diff mbox series

[v2,4/4] KVM: SVM: Support #GP handling for the case of nested on nested

Message ID 20210121065508.1169585-5-wei.huang2@amd.com (mailing list archive)
State New, archived
Headers show
Series Handle #GP for SVM execution instructions | expand

Commit Message

Wei Huang Jan. 21, 2021, 6:55 a.m. UTC
Under the case of nested on nested (e.g. L0->L1->L2->L3), #GP triggered
by SVM instructions can be hided from L1. Instead the hypervisor can
inject the proper #VMEXIT to inform L1 of what is happening. Thus L1
can avoid invoking the #GP workaround. For this reason we turns on
guest VM's X86_FEATURE_SVME_ADDR_CHK bit for KVM running inside VM to
receive the notification and change behavior.

Co-developed-by: Bandan Das <bsd@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
Signed-off-by: Wei Huang <wei.huang2@amd.com>
---
 arch/x86/kvm/svm/svm.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

Comments

Maxim Levitsky Jan. 21, 2021, 2:09 p.m. UTC | #1
On Thu, 2021-01-21 at 01:55 -0500, Wei Huang wrote:
> Under the case of nested on nested (e.g. L0->L1->L2->L3), #GP triggered
> by SVM instructions can be hided from L1. Instead the hypervisor can
> inject the proper #VMEXIT to inform L1 of what is happening. Thus L1
> can avoid invoking the #GP workaround. For this reason we turns on
> guest VM's X86_FEATURE_SVME_ADDR_CHK bit for KVM running inside VM to
> receive the notification and change behavior.
> 
> Co-developed-by: Bandan Das <bsd@redhat.com>
> Signed-off-by: Bandan Das <bsd@redhat.com>
> Signed-off-by: Wei Huang <wei.huang2@amd.com>
> ---
>  arch/x86/kvm/svm/svm.c | 19 ++++++++++++++++++-
>  1 file changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 2a12870ac71a..89512c0e7663 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -2196,6 +2196,11 @@ static int svm_instr_opcode(struct kvm_vcpu *vcpu)
>  
>  static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
>  {
> +	const int guest_mode_exit_codes[] = {
> +		[SVM_INSTR_VMRUN] = SVM_EXIT_VMRUN,
> +		[SVM_INSTR_VMLOAD] = SVM_EXIT_VMLOAD,
> +		[SVM_INSTR_VMSAVE] = SVM_EXIT_VMSAVE,
> +	};
>  	int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
>  		[SVM_INSTR_VMRUN] = vmrun_interception,
>  		[SVM_INSTR_VMLOAD] = vmload_interception,
> @@ -2203,7 +2208,14 @@ static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
>  	};
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> -	return svm_instr_handlers[opcode](svm);
> +	if (is_guest_mode(vcpu)) {
> +		svm->vmcb->control.exit_code = guest_mode_exit_codes[opcode];
> +		svm->vmcb->control.exit_info_1 = 0;
> +		svm->vmcb->control.exit_info_2 = 0;
> +
> +		return nested_svm_vmexit(svm);
> +	} else
> +		return svm_instr_handlers[opcode](svm);
>  }
>  
>  /*
> @@ -4034,6 +4046,11 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>  	/* Check again if INVPCID interception if required */
>  	svm_check_invpcid(svm);
>  
> +	if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM)) {
> +		best = kvm_find_cpuid_entry(vcpu, 0x8000000A, 0);
> +		best->edx |= (1 << 28);
> +	}
> +
>  	/* For sev guests, the memory encryption bit is not reserved in CR3.  */
>  	if (sev_guest(vcpu->kvm)) {
>  		best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0);

Tested-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>


Best regards,
	Maxim Levitsky
Paolo Bonzini Jan. 21, 2021, 2:25 p.m. UTC | #2
On 21/01/21 07:55, Wei Huang wrote:
> +	if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM)) {
> +		best = kvm_find_cpuid_entry(vcpu, 0x8000000A, 0);
> +		best->edx |= (1 << 28);
> +	}
> +

Instead of this, please use kvm_cpu_cap_set in svm_set_cpu_caps's "if 
(nested)".

Paolo
Dr. David Alan Gilbert Jan. 21, 2021, 2:56 p.m. UTC | #3
* Wei Huang (wei.huang2@amd.com) wrote:
> Under the case of nested on nested (e.g. L0->L1->L2->L3), #GP triggered
> by SVM instructions can be hided from L1. Instead the hypervisor can
> inject the proper #VMEXIT to inform L1 of what is happening. Thus L1
> can avoid invoking the #GP workaround. For this reason we turns on
> guest VM's X86_FEATURE_SVME_ADDR_CHK bit for KVM running inside VM to
> receive the notification and change behavior.

Doesn't this mean a VM migrated between levels (hmm L2 to L1???) would
see different behaviour?
(I've never tried such a migration, but I thought in principal it should
work).

Dave


> Co-developed-by: Bandan Das <bsd@redhat.com>
> Signed-off-by: Bandan Das <bsd@redhat.com>
> Signed-off-by: Wei Huang <wei.huang2@amd.com>
> ---
>  arch/x86/kvm/svm/svm.c | 19 ++++++++++++++++++-
>  1 file changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 2a12870ac71a..89512c0e7663 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -2196,6 +2196,11 @@ static int svm_instr_opcode(struct kvm_vcpu *vcpu)
>  
>  static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
>  {
> +	const int guest_mode_exit_codes[] = {
> +		[SVM_INSTR_VMRUN] = SVM_EXIT_VMRUN,
> +		[SVM_INSTR_VMLOAD] = SVM_EXIT_VMLOAD,
> +		[SVM_INSTR_VMSAVE] = SVM_EXIT_VMSAVE,
> +	};
>  	int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
>  		[SVM_INSTR_VMRUN] = vmrun_interception,
>  		[SVM_INSTR_VMLOAD] = vmload_interception,
> @@ -2203,7 +2208,14 @@ static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
>  	};
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> -	return svm_instr_handlers[opcode](svm);
> +	if (is_guest_mode(vcpu)) {
> +		svm->vmcb->control.exit_code = guest_mode_exit_codes[opcode];
> +		svm->vmcb->control.exit_info_1 = 0;
> +		svm->vmcb->control.exit_info_2 = 0;
> +
> +		return nested_svm_vmexit(svm);
> +	} else
> +		return svm_instr_handlers[opcode](svm);
>  }
>  
>  /*
> @@ -4034,6 +4046,11 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
>  	/* Check again if INVPCID interception if required */
>  	svm_check_invpcid(svm);
>  
> +	if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM)) {
> +		best = kvm_find_cpuid_entry(vcpu, 0x8000000A, 0);
> +		best->edx |= (1 << 28);
> +	}
> +
>  	/* For sev guests, the memory encryption bit is not reserved in CR3.  */
>  	if (sev_guest(vcpu->kvm)) {
>  		best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0);
> -- 
> 2.27.0
>
Maxim Levitsky Jan. 21, 2021, 3:10 p.m. UTC | #4
On Thu, 2021-01-21 at 14:56 +0000, Dr. David Alan Gilbert wrote:
> * Wei Huang (wei.huang2@amd.com) wrote:
> > Under the case of nested on nested (e.g. L0->L1->L2->L3), #GP triggered
> > by SVM instructions can be hided from L1. Instead the hypervisor can
> > inject the proper #VMEXIT to inform L1 of what is happening. Thus L1
> > can avoid invoking the #GP workaround. For this reason we turns on
> > guest VM's X86_FEATURE_SVME_ADDR_CHK bit for KVM running inside VM to
> > receive the notification and change behavior.
> 
> Doesn't this mean a VM migrated between levels (hmm L2 to L1???) would
> see different behaviour?
> (I've never tried such a migration, but I thought in principal it should
> work).

This is not an issue. The VM will always see the X86_FEATURE_SVME_ADDR_CHK set,
(regardless if host has it, or if KVM emulates it).
This is not different from what KVM does for guest's x2apic.
KVM also always emulates it regardless of the host support.

The hypervisor on the other hand can indeed either see or not that bit set,
but it is prepared to handle both cases, so it will support migrating VMs
between hosts that have and don't have that bit.

I hope that I understand this correctly.

Best regards,
	Maxim Levitsky


> 
> Dave
> 
> 
> > Co-developed-by: Bandan Das <bsd@redhat.com>
> > Signed-off-by: Bandan Das <bsd@redhat.com>
> > Signed-off-by: Wei Huang <wei.huang2@amd.com>
> > ---
> >  arch/x86/kvm/svm/svm.c | 19 ++++++++++++++++++-
> >  1 file changed, 18 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > index 2a12870ac71a..89512c0e7663 100644
> > --- a/arch/x86/kvm/svm/svm.c
> > +++ b/arch/x86/kvm/svm/svm.c
> > @@ -2196,6 +2196,11 @@ static int svm_instr_opcode(struct kvm_vcpu *vcpu)
> >  
> >  static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
> >  {
> > +	const int guest_mode_exit_codes[] = {
> > +		[SVM_INSTR_VMRUN] = SVM_EXIT_VMRUN,
> > +		[SVM_INSTR_VMLOAD] = SVM_EXIT_VMLOAD,
> > +		[SVM_INSTR_VMSAVE] = SVM_EXIT_VMSAVE,
> > +	};
> >  	int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
> >  		[SVM_INSTR_VMRUN] = vmrun_interception,
> >  		[SVM_INSTR_VMLOAD] = vmload_interception,
> > @@ -2203,7 +2208,14 @@ static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
> >  	};
> >  	struct vcpu_svm *svm = to_svm(vcpu);
> >  
> > -	return svm_instr_handlers[opcode](svm);
> > +	if (is_guest_mode(vcpu)) {
> > +		svm->vmcb->control.exit_code = guest_mode_exit_codes[opcode];
> > +		svm->vmcb->control.exit_info_1 = 0;
> > +		svm->vmcb->control.exit_info_2 = 0;
> > +
> > +		return nested_svm_vmexit(svm);
> > +	} else
> > +		return svm_instr_handlers[opcode](svm);
> >  }
> >  
> >  /*
> > @@ -4034,6 +4046,11 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> >  	/* Check again if INVPCID interception if required */
> >  	svm_check_invpcid(svm);
> >  
> > +	if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM)) {
> > +		best = kvm_find_cpuid_entry(vcpu, 0x8000000A, 0);
> > +		best->edx |= (1 << 28);
> > +	}
> > +
> >  	/* For sev guests, the memory encryption bit is not reserved in CR3.  */
> >  	if (sev_guest(vcpu->kvm)) {
> >  		best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0);
> > -- 
> > 2.27.0
> >
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2a12870ac71a..89512c0e7663 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2196,6 +2196,11 @@  static int svm_instr_opcode(struct kvm_vcpu *vcpu)
 
 static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
 {
+	const int guest_mode_exit_codes[] = {
+		[SVM_INSTR_VMRUN] = SVM_EXIT_VMRUN,
+		[SVM_INSTR_VMLOAD] = SVM_EXIT_VMLOAD,
+		[SVM_INSTR_VMSAVE] = SVM_EXIT_VMSAVE,
+	};
 	int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
 		[SVM_INSTR_VMRUN] = vmrun_interception,
 		[SVM_INSTR_VMLOAD] = vmload_interception,
@@ -2203,7 +2208,14 @@  static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
 	};
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	return svm_instr_handlers[opcode](svm);
+	if (is_guest_mode(vcpu)) {
+		svm->vmcb->control.exit_code = guest_mode_exit_codes[opcode];
+		svm->vmcb->control.exit_info_1 = 0;
+		svm->vmcb->control.exit_info_2 = 0;
+
+		return nested_svm_vmexit(svm);
+	} else
+		return svm_instr_handlers[opcode](svm);
 }
 
 /*
@@ -4034,6 +4046,11 @@  static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	/* Check again if INVPCID interception if required */
 	svm_check_invpcid(svm);
 
+	if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM)) {
+		best = kvm_find_cpuid_entry(vcpu, 0x8000000A, 0);
+		best->edx |= (1 << 28);
+	}
+
 	/* For sev guests, the memory encryption bit is not reserved in CR3.  */
 	if (sev_guest(vcpu->kvm)) {
 		best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0);