diff mbox series

[30/30] KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE

Message ID 20200529153934.11694-31-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: nSVM: event fixes and migration support | expand

Commit Message

Paolo Bonzini May 29, 2020, 3:39 p.m. UTC
Similar to VMX, the state that is captured through the currently available
IOCTLs is a mix of L1 and L2 state, dependent on whether the L2 guest was
running at the moment when the process was interrupted to save its state.

In particular, the SVM-specific state for nested virtualization includes
the L1 saved state (including the interrupt flag), the cached L2 controls,
and the GIF.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/kvm.h |  17 +++-
 arch/x86/kvm/cpuid.h            |   5 ++
 arch/x86/kvm/svm/nested.c       | 147 ++++++++++++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c          |   1 +
 arch/x86/kvm/vmx/nested.c       |   5 --
 arch/x86/kvm/x86.c              |   3 +-
 6 files changed, 171 insertions(+), 7 deletions(-)

Comments

Krish Sadhukhan June 2, 2020, 12:11 a.m. UTC | #1
On 5/29/20 8:39 AM, Paolo Bonzini wrote:
> Similar to VMX, the state that is captured through the currently available
> IOCTLs is a mix of L1 and L2 state, dependent on whether the L2 guest was
> running at the moment when the process was interrupted to save its state.
>
> In particular, the SVM-specific state for nested virtualization includes
> the L1 saved state (including the interrupt flag), the cached L2 controls,
> and the GIF.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   arch/x86/include/uapi/asm/kvm.h |  17 +++-
>   arch/x86/kvm/cpuid.h            |   5 ++
>   arch/x86/kvm/svm/nested.c       | 147 ++++++++++++++++++++++++++++++++
>   arch/x86/kvm/svm/svm.c          |   1 +
>   arch/x86/kvm/vmx/nested.c       |   5 --
>   arch/x86/kvm/x86.c              |   3 +-
>   6 files changed, 171 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> index 3f3f780c8c65..12075a9de1c1 100644
> --- a/arch/x86/include/uapi/asm/kvm.h
> +++ b/arch/x86/include/uapi/asm/kvm.h
> @@ -385,18 +385,22 @@ struct kvm_sync_regs {
>   #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
>   
>   #define KVM_STATE_NESTED_FORMAT_VMX	0
> -#define KVM_STATE_NESTED_FORMAT_SVM	1	/* unused */
> +#define KVM_STATE_NESTED_FORMAT_SVM	1
>   
>   #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
>   #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
>   #define KVM_STATE_NESTED_EVMCS		0x00000004
>   #define KVM_STATE_NESTED_MTF_PENDING	0x00000008
> +#define KVM_STATE_NESTED_GIF_SET	0x00000100
>   
>   #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
>   #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
>   
>   #define KVM_STATE_NESTED_VMX_VMCS_SIZE	0x1000
>   
> +#define KVM_STATE_NESTED_SVM_VMCB_SIZE	0x1000
> +
> +
>   struct kvm_vmx_nested_state_data {
>   	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
>   	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
> @@ -411,6 +415,15 @@ struct kvm_vmx_nested_state_hdr {
>   	} smm;
>   };
>   
> +struct kvm_svm_nested_state_data {
> +	/* Save area only used if KVM_STATE_NESTED_RUN_PENDING.  */
> +	__u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE];
> +};
> +
> +struct kvm_svm_nested_state_hdr {
> +	__u64 vmcb_pa;
> +};
> +
>   /* for KVM_CAP_NESTED_STATE */
>   struct kvm_nested_state {
>   	__u16 flags;
> @@ -419,6 +432,7 @@ struct kvm_nested_state {
>   
>   	union {
>   		struct kvm_vmx_nested_state_hdr vmx;
> +		struct kvm_svm_nested_state_hdr svm;
>   
>   		/* Pad the header to 128 bytes.  */
>   		__u8 pad[120];
> @@ -431,6 +445,7 @@ struct kvm_nested_state {
>   	 */
>   	union {
>   		struct kvm_vmx_nested_state_data vmx[0];
> +		struct kvm_svm_nested_state_data svm[0];
>   	} data;
>   };
>   
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index 63a70f6a3df3..05434cd9342f 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -303,4 +303,9 @@ static __always_inline void kvm_cpu_cap_check_and_set(unsigned int x86_feature)
>   		kvm_cpu_cap_set(x86_feature);
>   }
>   
> +static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
> +{
> +	return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
> +}
> +
>   #endif
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index c712fe577029..6b1049148c1b 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -25,6 +25,7 @@
>   #include "trace.h"
>   #include "mmu.h"
>   #include "x86.h"
> +#include "cpuid.h"
>   #include "lapic.h"
>   #include "svm.h"
>   
> @@ -238,6 +239,8 @@ static void load_nested_vmcb_control(struct vcpu_svm *svm,
>   {
>   	copy_vmcb_control_area(&svm->nested.ctl, control);
>   
> +	/* Copy it here because nested_svm_check_controls will check it.  */
> +	svm->nested.ctl.asid           = control->asid;
>   	svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL;
>   	svm->nested.ctl.iopm_base_pa  &= ~0x0fffULL;
>   }
> @@ -930,6 +933,150 @@ int nested_svm_exit_special(struct vcpu_svm *svm)
>   	return NESTED_EXIT_CONTINUE;
>   }
>   
> +static int svm_get_nested_state(struct kvm_vcpu *vcpu,
> +				struct kvm_nested_state __user *user_kvm_nested_state,
> +				u32 user_data_size)
> +{
> +	struct vcpu_svm *svm;
> +	struct kvm_nested_state kvm_state = {
> +		.flags = 0,
> +		.format = KVM_STATE_NESTED_FORMAT_SVM,
> +		.size = sizeof(kvm_state),
> +	};
> +	struct vmcb __user *user_vmcb = (struct vmcb __user *)
> +		&user_kvm_nested_state->data.svm[0];
> +
> +	if (!vcpu)
> +		return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE;
> +
> +	svm = to_svm(vcpu);
> +
> +	if (user_data_size < kvm_state.size)
> +		goto out;
> +
> +	/* First fill in the header and copy it out.  */
> +	if (is_guest_mode(vcpu)) {
> +		kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb;
> +		kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE;
> +		kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
> +
> +		if (svm->nested.nested_run_pending)
> +			kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
> +	}
> +
> +	if (gif_set(svm))
> +		kvm_state.flags |= KVM_STATE_NESTED_GIF_SET;
> +
> +	if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
> +		return -EFAULT;
> +
> +	if (!is_guest_mode(vcpu))
> +		goto out;
> +
> +	/*
> +	 * Copy over the full size of the VMCB rather than just the size
> +	 * of the structs.
> +	 */
> +	if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
> +		return -EFAULT;
> +	if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
> +			 sizeof(user_vmcb->control)))
> +		return -EFAULT;
> +	if (copy_to_user(&user_vmcb->save, &svm->nested.hsave->save,
> +			 sizeof(user_vmcb->save)))
> +		return -EFAULT;
> +
> +out:
> +	return kvm_state.size;
> +}
> +
> +static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> +				struct kvm_nested_state __user *user_kvm_nested_state,
> +				struct kvm_nested_state *kvm_state)
> +{
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +	struct vmcb *hsave = svm->nested.hsave;
> +	struct vmcb __user *user_vmcb = (struct vmcb __user *)
> +		&user_kvm_nested_state->data.svm[0];
> +	struct vmcb_control_area ctl;
> +	struct vmcb_save_area save;
> +	u32 cr0;
> +
> +	if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM)
> +		return -EINVAL;
> +
> +	if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE |
> +				 KVM_STATE_NESTED_RUN_PENDING |
> +				 KVM_STATE_NESTED_GIF_SET))
> +		return -EINVAL;
> +
> +	/*
> +	 * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
> +	 * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
> +	 */
> +	if (!(vcpu->arch.efer & EFER_SVME)) {
> +		/* GIF=1 and no guest mode are required if SVME=0.  */
> +		if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
> +			return -EINVAL;
> +	}
> +
> +	/* SMM temporarily disables SVM, so we cannot be in guest mode.  */
> +	if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
> +		return -EINVAL;
> +
> +	if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {


Should this be done up at the beginning of the function ? If this flag 
isn't set, we probably don't want to come this far.

> +		svm_leave_nested(svm);
> +		goto out_set_gif;
> +	}
> +
> +	if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa))
> +		return -EINVAL;
> +	if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
> +		return -EINVAL;
> +	if (copy_from_user(&ctl, &user_vmcb->control, sizeof(ctl)))
> +		return -EFAULT;
> +	if (copy_from_user(&save, &user_vmcb->save, sizeof(save)))
> +		return -EFAULT;
> +
> +	if (!nested_vmcb_check_controls(&ctl))
> +		return -EINVAL;
> +
> +	/*
> +	 * Processor state contains L2 state.  Check that it is
> +	 * valid for guest mode (see nested_vmcb_checks).
> +	 */
> +	cr0 = kvm_read_cr0(vcpu);
> +        if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
> +                return -EINVAL;


Does it make sense to create a wrapper for the CR0 checks ? We have 
these checks in nested_vmcb_check_controls() also.

> +
> +	/*
> +	 * Validate host state saved from before VMRUN (see
> +	 * nested_svm_check_permissions).
> +	 * TODO: validate reserved bits for all saved state.
> +	 */
> +	if (!(save.cr0 & X86_CR0_PG))
> +		return -EINVAL;
> +
> +	/*
> +	 * All checks done, we can enter guest mode.  L1 control fields
> +	 * come from the nested save state.  Guest state is already
> +	 * in the registers, the save area of the nested state instead
> +	 * contains saved L1 state.
> +	 */
> +	copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
> +	hsave->save = save;
> +
> +	svm->nested.vmcb = kvm_state->hdr.svm.vmcb_pa;
> +	load_nested_vmcb_control(svm, &ctl);
> +	nested_prepare_vmcb_control(svm);
> +
> +out_set_gif:
> +	svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
> +	return 0;
> +}
> +
>   struct kvm_x86_nested_ops svm_nested_ops = {
>   	.check_events = svm_check_nested_events,
> +	.get_state = svm_get_nested_state,
> +	.set_state = svm_set_nested_state,
>   };
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index b4db9a980469..3871bfb40594 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1193,6 +1193,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
>   		svm->avic_is_running = true;
>   
>   	svm->nested.hsave = page_address(hsave_page);
> +	clear_page(svm->nested.hsave);
>   
>   	svm->msrpm = page_address(msrpm_pages);
>   	svm_vcpu_init_msrpm(svm->msrpm);
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 51ebb60e1533..106fc6fceb97 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -437,11 +437,6 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
>   	}
>   }
>   
> -static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
> -{
> -	return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
> -}
> -
>   static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
>   					       struct vmcs12 *vmcs12)
>   {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f0fa610bed91..d4aa7dc662d5 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4628,7 +4628,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>   
>   		if (kvm_state.flags &
>   		    ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
> -		      | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING))
> +		      | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING
> +		      | KVM_STATE_NESTED_GIF_SET))
>   			break;
>   
>   		/* nested_run_pending implies guest_mode.  */
Paolo Bonzini June 4, 2020, 2:47 p.m. UTC | #2
Sorry I missed this.

On 02/06/20 02:11, Krish Sadhukhan wrote:
>>
>> +
>> +    /* SMM temporarily disables SVM, so we cannot be in guest mode.  */
>> +    if (is_smm(vcpu) && (kvm_state->flags &
>> KVM_STATE_NESTED_GUEST_MODE))
>> +        return -EINVAL;
>> +
>> +    if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
> 
> 
> Should this be done up at the beginning of the function ? If this flag
> isn't set, we probably don't want to come this far.

So far we have only done consistency checks.  These have to be done no
matter what (for example checking that GIF=1 if SVME=0).

>> +        svm_leave_nested(svm);
>> +        goto out_set_gif;
>> +    }
>> +
>> +    if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa))
>> +        return -EINVAL;
>> +    if (kvm_state->size < sizeof(*kvm_state) +
>> KVM_STATE_NESTED_SVM_VMCB_SIZE)
>> +        return -EINVAL;
>> +    if (copy_from_user(&ctl, &user_vmcb->control, sizeof(ctl)))
>> +        return -EFAULT;
>> +    if (copy_from_user(&save, &user_vmcb->save, sizeof(save)))
>> +        return -EFAULT;
>> +
>> +    if (!nested_vmcb_check_controls(&ctl))
>> +        return -EINVAL;
>> +
>> +    /*
>> +     * Processor state contains L2 state.  Check that it is
>> +     * valid for guest mode (see nested_vmcb_checks).
>> +     */
>> +    cr0 = kvm_read_cr0(vcpu);
>> +        if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
>> +                return -EINVAL;
> 
> 
> Does it make sense to create a wrapper for the CR0 checks ? We have
> these checks in nested_vmcb_check_controls() also.

Not in nested_vmcb_check_controls (rather nested_vmcb_checks as
mentioned in the comments).

If there are more checks it certainly makes sense to have them.  Right
now however there are only two checks in svm_set_nested_state, and they
come from two different functions so I chose to duplicate them.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 3f3f780c8c65..12075a9de1c1 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -385,18 +385,22 @@  struct kvm_sync_regs {
 #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
 
 #define KVM_STATE_NESTED_FORMAT_VMX	0
-#define KVM_STATE_NESTED_FORMAT_SVM	1	/* unused */
+#define KVM_STATE_NESTED_FORMAT_SVM	1
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
 #define KVM_STATE_NESTED_EVMCS		0x00000004
 #define KVM_STATE_NESTED_MTF_PENDING	0x00000008
+#define KVM_STATE_NESTED_GIF_SET	0x00000100
 
 #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
 
 #define KVM_STATE_NESTED_VMX_VMCS_SIZE	0x1000
 
+#define KVM_STATE_NESTED_SVM_VMCB_SIZE	0x1000
+
+
 struct kvm_vmx_nested_state_data {
 	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
 	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
@@ -411,6 +415,15 @@  struct kvm_vmx_nested_state_hdr {
 	} smm;
 };
 
+struct kvm_svm_nested_state_data {
+	/* Save area only used if KVM_STATE_NESTED_RUN_PENDING.  */
+	__u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE];
+};
+
+struct kvm_svm_nested_state_hdr {
+	__u64 vmcb_pa;
+};
+
 /* for KVM_CAP_NESTED_STATE */
 struct kvm_nested_state {
 	__u16 flags;
@@ -419,6 +432,7 @@  struct kvm_nested_state {
 
 	union {
 		struct kvm_vmx_nested_state_hdr vmx;
+		struct kvm_svm_nested_state_hdr svm;
 
 		/* Pad the header to 128 bytes.  */
 		__u8 pad[120];
@@ -431,6 +445,7 @@  struct kvm_nested_state {
 	 */
 	union {
 		struct kvm_vmx_nested_state_data vmx[0];
+		struct kvm_svm_nested_state_data svm[0];
 	} data;
 };
 
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 63a70f6a3df3..05434cd9342f 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -303,4 +303,9 @@  static __always_inline void kvm_cpu_cap_check_and_set(unsigned int x86_feature)
 		kvm_cpu_cap_set(x86_feature);
 }
 
+static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+	return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
+}
+
 #endif
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index c712fe577029..6b1049148c1b 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -25,6 +25,7 @@ 
 #include "trace.h"
 #include "mmu.h"
 #include "x86.h"
+#include "cpuid.h"
 #include "lapic.h"
 #include "svm.h"
 
@@ -238,6 +239,8 @@  static void load_nested_vmcb_control(struct vcpu_svm *svm,
 {
 	copy_vmcb_control_area(&svm->nested.ctl, control);
 
+	/* Copy it here because nested_svm_check_controls will check it.  */
+	svm->nested.ctl.asid           = control->asid;
 	svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL;
 	svm->nested.ctl.iopm_base_pa  &= ~0x0fffULL;
 }
@@ -930,6 +933,150 @@  int nested_svm_exit_special(struct vcpu_svm *svm)
 	return NESTED_EXIT_CONTINUE;
 }
 
+static int svm_get_nested_state(struct kvm_vcpu *vcpu,
+				struct kvm_nested_state __user *user_kvm_nested_state,
+				u32 user_data_size)
+{
+	struct vcpu_svm *svm;
+	struct kvm_nested_state kvm_state = {
+		.flags = 0,
+		.format = KVM_STATE_NESTED_FORMAT_SVM,
+		.size = sizeof(kvm_state),
+	};
+	struct vmcb __user *user_vmcb = (struct vmcb __user *)
+		&user_kvm_nested_state->data.svm[0];
+
+	if (!vcpu)
+		return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE;
+
+	svm = to_svm(vcpu);
+
+	if (user_data_size < kvm_state.size)
+		goto out;
+
+	/* First fill in the header and copy it out.  */
+	if (is_guest_mode(vcpu)) {
+		kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb;
+		kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE;
+		kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
+
+		if (svm->nested.nested_run_pending)
+			kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
+	}
+
+	if (gif_set(svm))
+		kvm_state.flags |= KVM_STATE_NESTED_GIF_SET;
+
+	if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
+		return -EFAULT;
+
+	if (!is_guest_mode(vcpu))
+		goto out;
+
+	/*
+	 * Copy over the full size of the VMCB rather than just the size
+	 * of the structs.
+	 */
+	if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
+		return -EFAULT;
+	if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
+			 sizeof(user_vmcb->control)))
+		return -EFAULT;
+	if (copy_to_user(&user_vmcb->save, &svm->nested.hsave->save,
+			 sizeof(user_vmcb->save)))
+		return -EFAULT;
+
+out:
+	return kvm_state.size;
+}
+
+static int svm_set_nested_state(struct kvm_vcpu *vcpu,
+				struct kvm_nested_state __user *user_kvm_nested_state,
+				struct kvm_nested_state *kvm_state)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb *hsave = svm->nested.hsave;
+	struct vmcb __user *user_vmcb = (struct vmcb __user *)
+		&user_kvm_nested_state->data.svm[0];
+	struct vmcb_control_area ctl;
+	struct vmcb_save_area save;
+	u32 cr0;
+
+	if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM)
+		return -EINVAL;
+
+	if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE |
+				 KVM_STATE_NESTED_RUN_PENDING |
+				 KVM_STATE_NESTED_GIF_SET))
+		return -EINVAL;
+
+	/*
+	 * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
+	 * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
+	 */
+	if (!(vcpu->arch.efer & EFER_SVME)) {
+		/* GIF=1 and no guest mode are required if SVME=0.  */
+		if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
+			return -EINVAL;
+	}
+
+	/* SMM temporarily disables SVM, so we cannot be in guest mode.  */
+	if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
+		return -EINVAL;
+
+	if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
+		svm_leave_nested(svm);
+		goto out_set_gif;
+	}
+
+	if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa))
+		return -EINVAL;
+	if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
+		return -EINVAL;
+	if (copy_from_user(&ctl, &user_vmcb->control, sizeof(ctl)))
+		return -EFAULT;
+	if (copy_from_user(&save, &user_vmcb->save, sizeof(save)))
+		return -EFAULT;
+
+	if (!nested_vmcb_check_controls(&ctl))
+		return -EINVAL;
+
+	/*
+	 * Processor state contains L2 state.  Check that it is
+	 * valid for guest mode (see nested_vmcb_checks).
+	 */
+	cr0 = kvm_read_cr0(vcpu);
+        if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
+                return -EINVAL;
+
+	/*
+	 * Validate host state saved from before VMRUN (see
+	 * nested_svm_check_permissions).
+	 * TODO: validate reserved bits for all saved state.
+	 */
+	if (!(save.cr0 & X86_CR0_PG))
+		return -EINVAL;
+
+	/*
+	 * All checks done, we can enter guest mode.  L1 control fields
+	 * come from the nested save state.  Guest state is already
+	 * in the registers, the save area of the nested state instead
+	 * contains saved L1 state.
+	 */
+	copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
+	hsave->save = save;
+
+	svm->nested.vmcb = kvm_state->hdr.svm.vmcb_pa;
+	load_nested_vmcb_control(svm, &ctl);
+	nested_prepare_vmcb_control(svm);
+
+out_set_gif:
+	svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+	return 0;
+}
+
 struct kvm_x86_nested_ops svm_nested_ops = {
 	.check_events = svm_check_nested_events,
+	.get_state = svm_get_nested_state,
+	.set_state = svm_set_nested_state,
 };
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b4db9a980469..3871bfb40594 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1193,6 +1193,7 @@  static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 		svm->avic_is_running = true;
 
 	svm->nested.hsave = page_address(hsave_page);
+	clear_page(svm->nested.hsave);
 
 	svm->msrpm = page_address(msrpm_pages);
 	svm_vcpu_init_msrpm(svm->msrpm);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 51ebb60e1533..106fc6fceb97 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -437,11 +437,6 @@  static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 	}
 }
 
-static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
-{
-	return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
-}
-
 static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
 					       struct vmcs12 *vmcs12)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f0fa610bed91..d4aa7dc662d5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4628,7 +4628,8 @@  long kvm_arch_vcpu_ioctl(struct file *filp,
 
 		if (kvm_state.flags &
 		    ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
-		      | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING))
+		      | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING
+		      | KVM_STATE_NESTED_GIF_SET))
 			break;
 
 		/* nested_run_pending implies guest_mode.  */