diff mbox series

[3/3] KVM, SEV: Add support for SEV-ES local migration

Message ID 20210621163118.1040170-4-pgonda@google.com (mailing list archive)
State New, archived
Headers show
Series Add AMD SEV and SEV-ES local migration support | expand

Commit Message

Peter Gonda June 21, 2021, 4:31 p.m. UTC
Local migration provides a low-cost mechanism for userspace VMM upgrades.
It is an alternative to traditional (i.e., remote) live migration. Whereas
remote migration handles move a guest to a new host, local migration only
handles moving a guest to a new userspace VMM within a host.

For SEV-ES to work with local migration the VMSAs, GHCB metadata,
and other SEV-ES info needs to be preserved along with the guest's
memory. KVM maintains a pointer to each vCPUs GHCB and may additionally
contain an copy of the GHCB's save area if the guest has been using it
for NAE handling. The local send and receive ioctls have been updated to
move this additional metadata required for each vCPU in SEV-ES into
hashmap for SEV local migration data.

Signed-off-by: Peter Gonda <pgonda@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Jim Mattson <jmattson@google.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org

---
 arch/x86/kvm/svm/sev.c | 164 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 150 insertions(+), 14 deletions(-)

Comments

Brijesh Singh July 13, 2021, 6:41 p.m. UTC | #1
On 6/21/21 11:31 AM, Peter Gonda wrote:

> @@ -1196,8 +1299,19 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>   	INIT_LIST_HEAD(&entry->regions_list);
>   	list_replace_init(&sev->regions_list, &entry->regions_list);
>   
> +	if (sev_es_guest(kvm)) {
> +		/*
> +		 * If this is an ES guest, we need to move each VMCB's VMSA into a
> +		 * list for migration.
> +		 */
> +		entry->es_enabled = true;
> +		entry->ap_jump_table = sev->ap_jump_table;
> +		if (create_vmsa_list(kvm, entry))
> +			goto e_listdel;
> +	}
> +

the patch looks good. Similar to the previous patch, do we need to check 
for the SEV guest state >= LAUNCH_UPDATE to be sure that VMSA's are 
encrypted before we go about sharing it with the new VMM ?

-Brijesh
Sean Christopherson July 13, 2021, 10:21 p.m. UTC | #2
On Mon, Jun 21, 2021, Peter Gonda wrote:
> +static int process_vmsa_list(struct kvm *kvm, struct list_head *vmsa_list)
> +{
> +	struct vmsa_node *vmsa_node, *q;
> +	struct kvm_vcpu *vcpu;
> +	struct vcpu_svm *svm;
> +
> +	lockdep_assert_held(&kvm->lock);
> +
> +	if (!vmsa_list)

This is pointless, all callers pass in a list, i.e. it's mandatory.

> +		return 0;
> +
> +	list_for_each_entry(vmsa_node, vmsa_list, list) {
> +		if (!kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id)) {
> +			WARN(1,
> +			     "Failed to find VCPU with ID %d despite presence in VMSA list.\n",
> +			     vmsa_node->vcpu_id);
> +			return -1;
> +		}
> +	}
> +
> +	/*
> +	 * Move any stashed VMSAs back to their respective VMCBs and delete
> +	 * those nodes.
> +	 */
> +	list_for_each_entry_safe(vmsa_node, q, vmsa_list, list) {
> +		vcpu = kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id);

Barring a KVM bug, is it even theoretically possible for vcpu to be NULL?  If not,
I'd simply drop the above sanity check.  If this can only be true if there's a
KVM bug and you really want to keep it the WARN, just do:

		if (WARN_ON(!vcpu))
			continue;

since a KVM bug this egregious means all bets are off anyways.  That should also
allow you to make this a void returning helper and avoid pointless checking.

> +		svm = to_svm(vcpu);
> +		svm->vmsa = vmsa_node->vmsa;
> +		svm->ghcb = vmsa_node->ghcb;
> +		svm->vmcb->control.ghcb_gpa = vmsa_node->ghcb_gpa;
> +		svm->vcpu.arch.guest_state_protected = true;
> +		svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
> +		svm->ghcb_sa = vmsa_node->ghcb_sa;
> +		svm->ghcb_sa_len = vmsa_node->ghcb_sa_len;
> +		svm->ghcb_sa_sync = vmsa_node->ghcb_sa_sync;
> +		svm->ghcb_sa_free = vmsa_node->ghcb_sa_free;
> +
> +		list_del(&vmsa_node->list);
> +		kfree(vmsa_node);
> +	}
> +
> +	return 0;
> +}
> +
> +static int create_vmsa_list(struct kvm *kvm,
> +			    struct sev_info_migration_node *entry)
> +{
> +	int i;
> +	const int num_vcpus = atomic_read(&kvm->online_vcpus);
> +	struct vmsa_node *node;
> +	struct kvm_vcpu *vcpu;
> +	struct vcpu_svm *svm;
> +
> +	INIT_LIST_HEAD(&entry->vmsa_list);
> +	for (i = 0; i < num_vcpus; ++i) {
> +		node = kzalloc(sizeof(*node), GFP_KERNEL);
> +		if (!node)
> +			goto e_freelist;
> +
> +		vcpu = kvm->vcpus[i];
> +		node->vcpu_id = vcpu->vcpu_id;
> +
> +		svm = to_svm(vcpu);
> +		node->vmsa = svm->vmsa;
> +		svm->vmsa = NULL;
> +		node->ghcb = svm->ghcb;
> +		svm->ghcb = NULL;
> +		node->ghcb_gpa = svm->vmcb->control.ghcb_gpa;
> +		node->ghcb_sa = svm->ghcb_sa;
> +		svm->ghcb_sa = NULL;
> +		node->ghcb_sa_len = svm->ghcb_sa_len;
> +		svm->ghcb_sa_len = 0;
> +		node->ghcb_sa_sync = svm->ghcb_sa_sync;
> +		svm->ghcb_sa_sync = false;
> +		node->ghcb_sa_free = svm->ghcb_sa_free;
> +		svm->ghcb_sa_free = false;
> +
> +		list_add_tail(&node->list, &entry->vmsa_list);
> +	}
> +
> +	return 0;
> +
> +e_freelist:
> +	if (process_vmsa_list(kvm, &entry->vmsa_list))
> +		WARN(1, "Unable to move VMSA list back to source VM. Guest is in a broken state now.");

Same comments about err_freelist and using WARN_ON().  Though if process_vmsa_list()
can't return an error, this goes away entirely.

> +	return -1;
> +}
> +
>  static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  {
>  	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> @@ -1174,9 +1280,6 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	if (!sev_guest(kvm))
>  		return -ENOTTY;
>  
> -	if (sev->es_active)
> -		return -EPERM;
> -
>  	if (sev->info_token != 0)
>  		return -EEXIST;
>  
> @@ -1196,8 +1299,19 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	INIT_LIST_HEAD(&entry->regions_list);
>  	list_replace_init(&sev->regions_list, &entry->regions_list);
>  
> +	if (sev_es_guest(kvm)) {
> +		/*
> +		 * If this is an ES guest, we need to move each VMCB's VMSA into a
> +		 * list for migration.
> +		 */
> +		entry->es_enabled = true;
> +		entry->ap_jump_table = sev->ap_jump_table;
> +		if (create_vmsa_list(kvm, entry))
> +			goto e_listdel;
> +	}
> +
>  	if (place_migration_node(entry))
> -		goto e_listdel;
> +		goto e_vmsadel;
>  
>  	token = entry->token;
>  
> @@ -1215,6 +1329,11 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	hash_del(&entry->hnode);
>  	spin_unlock(&sev_info_migration_hash_lock);
>  
> +e_vmsadel:
> +	if (sev_es_guest(kvm) && process_vmsa_list(kvm, &entry->vmsa_list))
> +		WARN(1,
> +		     "Unable to move VMSA list back to source VM. Guest is in a broken state now.");

Guess what today's Final Jeopardy answer is? :-D

> +
>  e_listdel:
>  	list_replace_init(&entry->regions_list, &sev->regions_list);
>  
> @@ -1233,9 +1352,6 @@ static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	if (!sev_guest(kvm))
>  		return -ENOTTY;
>  
> -	if (sev->es_active)
> -		return -EPERM;
> -
>  	if (sev->handle != 0)
>  		return -EPERM;
>  
> @@ -1254,6 +1370,14 @@ static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  
>  	memcpy(&old_info, sev, sizeof(old_info));
>  
> +	if (entry->es_enabled) {
> +		if (process_vmsa_list(kvm, &entry->vmsa_list))
> +			goto err_unlock;
> +
> +		sev->es_active = true;
> +		sev->ap_jump_table = entry->ap_jump_table;
> +	}
> +
>  	/*
>  	 * The source VM always frees @entry On the target we simply
>  	 * mark the token as invalid to notify the source the sev info
> @@ -2046,12 +2170,22 @@ void sev_vm_destroy(struct kvm *kvm)
>  		__unregister_region_list_locked(kvm, &sev->regions_list);
>  	}
>  
> -	/*
> -	 * If userspace was terminated before unregistering the memory
> -	 * regions then lets unpin all the registered memory.
> -	 */
> -	if (entry)
> +	if (entry) {
> +		/*
> +		 * If there are any saved VMSAs, restore them so they can be
> +		 * destructed through the normal path.
> +		 */
> +		if (entry->es_enabled)
> +			if (process_vmsa_list(kvm, &entry->vmsa_list))
> +				WARN(1,
> +				     "Unable to clean up vmsa_list");

More code that can be zapped if process_vmsa_list() is less of a zealot.

> +
> +		/*
> +		 * If userspace was terminated before unregistering the memory
> +		 * regions then lets unpin all the registered memory.
> +		 */
>  		__unregister_region_list_locked(kvm, &entry->regions_list);
> +	}
>  
>  	mutex_unlock(&kvm->lock);
>  
> @@ -2243,9 +2377,11 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
>  
>  	svm = to_svm(vcpu);
>  
> -	if (vcpu->arch.guest_state_protected)
> +	if (svm->ghcb && vcpu->arch.guest_state_protected)
>  		sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
> -	__free_page(virt_to_page(svm->vmsa));
> +
> +	if (svm->vmsa)
> +		__free_page(virt_to_page(svm->vmsa));
>  
>  	if (svm->ghcb_sa_free)
>  		kfree(svm->ghcb_sa);
> -- 
> 2.32.0.288.g62a8d224e6-goog
>
Marc Orr July 13, 2021, 11:25 p.m. UTC | #3
On Mon, Jun 21, 2021 at 9:59 AM Peter Gonda <pgonda@google.com> wrote:
>
> Local migration provides a low-cost mechanism for userspace VMM upgrades.
> It is an alternative to traditional (i.e., remote) live migration. Whereas
> remote migration handles move a guest to a new host, local migration only
> handles moving a guest to a new userspace VMM within a host.
>
> For SEV-ES to work with local migration the VMSAs, GHCB metadata,
> and other SEV-ES info needs to be preserved along with the guest's
> memory. KVM maintains a pointer to each vCPUs GHCB and may additionally
> contain an copy of the GHCB's save area if the guest has been using it
> for NAE handling. The local send and receive ioctls have been updated to
> move this additional metadata required for each vCPU in SEV-ES into
> hashmap for SEV local migration data.
>
> Signed-off-by: Peter Gonda <pgonda@google.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Sean Christopherson <seanjc@google.com>
> Cc: David Rientjes <rientjes@google.com>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Cc: Brijesh Singh <brijesh.singh@amd.com>
> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> Cc: Wanpeng Li <wanpengli@tencent.com>
> Cc: Jim Mattson <jmattson@google.com>
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: kvm@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
>
> ---
>  arch/x86/kvm/svm/sev.c | 164 +++++++++++++++++++++++++++++++++++++----
>  1 file changed, 150 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 7c33ad2b910d..33df7ed08d21 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -77,6 +77,19 @@ struct enc_region {
>         unsigned long size;
>  };
>
> +struct vmsa_node {
> +       struct list_head list;
> +       int vcpu_id;
> +       struct vmcb_save_area *vmsa;
> +       struct ghcb *ghcb;
> +       u64 ghcb_gpa;
> +
> +       void *ghcb_sa;
> +       u64 ghcb_sa_len;
> +       bool ghcb_sa_sync;
> +       bool ghcb_sa_free;
> +};
> +
>  struct sev_info_migration_node {
>         struct hlist_node hnode;
>         u64 token;
> @@ -87,6 +100,11 @@ struct sev_info_migration_node {
>         unsigned long pages_locked;
>         struct list_head regions_list;
>         struct misc_cg *misc_cg;
> +
> +       /* The following fields are for SEV-ES guests */
> +       bool es_enabled;
> +       struct list_head vmsa_list;
> +       u64 ap_jump_table;
>  };
>
>  #define SEV_INFO_MIGRATION_HASH_BITS    7
> @@ -1163,6 +1181,94 @@ static int place_migration_node(struct sev_info_migration_node *entry)
>         return ret;
>  }
>
> +static int process_vmsa_list(struct kvm *kvm, struct list_head *vmsa_list)
> +{
> +       struct vmsa_node *vmsa_node, *q;
> +       struct kvm_vcpu *vcpu;
> +       struct vcpu_svm *svm;
> +
> +       lockdep_assert_held(&kvm->lock);
> +
> +       if (!vmsa_list)
> +               return 0;
> +
> +       list_for_each_entry(vmsa_node, vmsa_list, list) {
> +               if (!kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id)) {
> +                       WARN(1,
> +                            "Failed to find VCPU with ID %d despite presence in VMSA list.\n",
> +                            vmsa_node->vcpu_id);
> +                       return -1;
> +               }
> +       }
> +
> +       /*
> +        * Move any stashed VMSAs back to their respective VMCBs and delete
> +        * those nodes.
> +        */
> +       list_for_each_entry_safe(vmsa_node, q, vmsa_list, list) {
> +               vcpu = kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id);
> +               svm = to_svm(vcpu);
> +               svm->vmsa = vmsa_node->vmsa;
> +               svm->ghcb = vmsa_node->ghcb;
> +               svm->vmcb->control.ghcb_gpa = vmsa_node->ghcb_gpa;
> +               svm->vcpu.arch.guest_state_protected = true;
> +               svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
> +               svm->ghcb_sa = vmsa_node->ghcb_sa;
> +               svm->ghcb_sa_len = vmsa_node->ghcb_sa_len;
> +               svm->ghcb_sa_sync = vmsa_node->ghcb_sa_sync;
> +               svm->ghcb_sa_free = vmsa_node->ghcb_sa_free;
> +
> +               list_del(&vmsa_node->list);
> +               kfree(vmsa_node);
> +       }
> +
> +       return 0;
> +}
> +
> +static int create_vmsa_list(struct kvm *kvm,
> +                           struct sev_info_migration_node *entry)
> +{
> +       int i;
> +       const int num_vcpus = atomic_read(&kvm->online_vcpus);
> +       struct vmsa_node *node;
> +       struct kvm_vcpu *vcpu;
> +       struct vcpu_svm *svm;
> +
> +       INIT_LIST_HEAD(&entry->vmsa_list);
> +       for (i = 0; i < num_vcpus; ++i) {
> +               node = kzalloc(sizeof(*node), GFP_KERNEL);
> +               if (!node)
> +                       goto e_freelist;
> +
> +               vcpu = kvm->vcpus[i];
> +               node->vcpu_id = vcpu->vcpu_id;
> +
> +               svm = to_svm(vcpu);
> +               node->vmsa = svm->vmsa;
> +               svm->vmsa = NULL;
> +               node->ghcb = svm->ghcb;
> +               svm->ghcb = NULL;
> +               node->ghcb_gpa = svm->vmcb->control.ghcb_gpa;
> +               node->ghcb_sa = svm->ghcb_sa;
> +               svm->ghcb_sa = NULL;
> +               node->ghcb_sa_len = svm->ghcb_sa_len;
> +               svm->ghcb_sa_len = 0;
> +               node->ghcb_sa_sync = svm->ghcb_sa_sync;
> +               svm->ghcb_sa_sync = false;
> +               node->ghcb_sa_free = svm->ghcb_sa_free;
> +               svm->ghcb_sa_free = false;
> +
> +               list_add_tail(&node->list, &entry->vmsa_list);
> +       }
> +
> +       return 0;
> +
> +e_freelist:
> +       if (process_vmsa_list(kvm, &entry->vmsa_list))
> +               WARN(1, "Unable to move VMSA list back to source VM. Guest is in a broken state now.");
> +       return -1;
> +}
> +
>  static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  {
>         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> @@ -1174,9 +1280,6 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>         if (!sev_guest(kvm))
>                 return -ENOTTY;
>
> -       if (sev->es_active)
> -               return -EPERM;
> -
>         if (sev->info_token != 0)
>                 return -EEXIST;
>
> @@ -1196,8 +1299,19 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>         INIT_LIST_HEAD(&entry->regions_list);
>         list_replace_init(&sev->regions_list, &entry->regions_list);
>
> +       if (sev_es_guest(kvm)) {
> +               /*
> +                * If this is an ES guest, we need to move each VMCB's VMSA into a
> +                * list for migration.
> +                */
> +               entry->es_enabled = true;
> +               entry->ap_jump_table = sev->ap_jump_table;
> +               if (create_vmsa_list(kvm, entry))
> +                       goto e_listdel;
> +       }
> +
>         if (place_migration_node(entry))
> -               goto e_listdel;
> +               goto e_vmsadel;
>
>         token = entry->token;
>
> @@ -1215,6 +1329,11 @@ static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
>         hash_del(&entry->hnode);
>         spin_unlock(&sev_info_migration_hash_lock);
>
> +e_vmsadel:
> +       if (sev_es_guest(kvm) && process_vmsa_list(kvm, &entry->vmsa_list))
> +               WARN(1,
> +                    "Unable to move VMSA list back to source VM. Guest is in a broken state now.");
> +
>  e_listdel:
>         list_replace_init(&entry->regions_list, &sev->regions_list);
>
> @@ -1233,9 +1352,6 @@ static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp)
>         if (!sev_guest(kvm))
>                 return -ENOTTY;
>
> -       if (sev->es_active)
> -               return -EPERM;
> -
>         if (sev->handle != 0)
>                 return -EPERM;
>
> @@ -1254,6 +1370,14 @@ static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp)
>
>         memcpy(&old_info, sev, sizeof(old_info));
>
> +       if (entry->es_enabled) {
> +               if (process_vmsa_list(kvm, &entry->vmsa_list))
> +                       goto err_unlock;
> +
> +               sev->es_active = true;
> +               sev->ap_jump_table = entry->ap_jump_table;
> +       }
> +
>         /*
>          * The source VM always frees @entry On the target we simply
>          * mark the token as invalid to notify the source the sev info
> @@ -2046,12 +2170,22 @@ void sev_vm_destroy(struct kvm *kvm)
>                 __unregister_region_list_locked(kvm, &sev->regions_list);
>         }
>
> -       /*
> -        * If userspace was terminated before unregistering the memory
> -        * regions then lets unpin all the registered memory.
> -        */
> -       if (entry)
> +       if (entry) {
> +               /*
> +                * If there are any saved VMSAs, restore them so they can be
> +                * destructed through the normal path.
> +                */
> +               if (entry->es_enabled)
> +                       if (process_vmsa_list(kvm, &entry->vmsa_list))
> +                               WARN(1,
> +                                    "Unable to clean up vmsa_list");
> +
> +               /*
> +                * If userspace was terminated before unregistering the memory
> +                * regions then lets unpin all the registered memory.
> +                */
>                 __unregister_region_list_locked(kvm, &entry->regions_list);
> +       }
>
>         mutex_unlock(&kvm->lock);
>
> @@ -2243,9 +2377,11 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
>
>         svm = to_svm(vcpu);
>
> -       if (vcpu->arch.guest_state_protected)
> +       if (svm->ghcb && vcpu->arch.guest_state_protected)
>                 sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
> -       __free_page(virt_to_page(svm->vmsa));
> +
> +       if (svm->vmsa)
> +               __free_page(virt_to_page(svm->vmsa));
>
>         if (svm->ghcb_sa_free)
>                 kfree(svm->ghcb_sa);
> --
> 2.32.0.288.g62a8d224e6-goog
>

Reviewed-by: Marc Orr <marcorr@google.com>
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 7c33ad2b910d..33df7ed08d21 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -77,6 +77,19 @@  struct enc_region {
 	unsigned long size;
 };
 
+struct vmsa_node {
+	struct list_head list;
+	int vcpu_id;
+	struct vmcb_save_area *vmsa;
+	struct ghcb *ghcb;
+	u64 ghcb_gpa;
+
+	void *ghcb_sa;
+	u64 ghcb_sa_len;
+	bool ghcb_sa_sync;
+	bool ghcb_sa_free;
+};
+
 struct sev_info_migration_node {
 	struct hlist_node hnode;
 	u64 token;
@@ -87,6 +100,11 @@  struct sev_info_migration_node {
 	unsigned long pages_locked;
 	struct list_head regions_list;
 	struct misc_cg *misc_cg;
+
+	/* The following fields are for SEV-ES guests */
+	bool es_enabled;
+	struct list_head vmsa_list;
+	u64 ap_jump_table;
 };
 
 #define SEV_INFO_MIGRATION_HASH_BITS    7
@@ -1163,6 +1181,94 @@  static int place_migration_node(struct sev_info_migration_node *entry)
 	return ret;
 }
 
+static int process_vmsa_list(struct kvm *kvm, struct list_head *vmsa_list)
+{
+	struct vmsa_node *vmsa_node, *q;
+	struct kvm_vcpu *vcpu;
+	struct vcpu_svm *svm;
+
+	lockdep_assert_held(&kvm->lock);
+
+	if (!vmsa_list)
+		return 0;
+
+	list_for_each_entry(vmsa_node, vmsa_list, list) {
+		if (!kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id)) {
+			WARN(1,
+			     "Failed to find VCPU with ID %d despite presence in VMSA list.\n",
+			     vmsa_node->vcpu_id);
+			return -1;
+		}
+	}
+
+	/*
+	 * Move any stashed VMSAs back to their respective VMCBs and delete
+	 * those nodes.
+	 */
+	list_for_each_entry_safe(vmsa_node, q, vmsa_list, list) {
+		vcpu = kvm_get_vcpu_by_id(kvm, vmsa_node->vcpu_id);
+		svm = to_svm(vcpu);
+		svm->vmsa = vmsa_node->vmsa;
+		svm->ghcb = vmsa_node->ghcb;
+		svm->vmcb->control.ghcb_gpa = vmsa_node->ghcb_gpa;
+		svm->vcpu.arch.guest_state_protected = true;
+		svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
+		svm->ghcb_sa = vmsa_node->ghcb_sa;
+		svm->ghcb_sa_len = vmsa_node->ghcb_sa_len;
+		svm->ghcb_sa_sync = vmsa_node->ghcb_sa_sync;
+		svm->ghcb_sa_free = vmsa_node->ghcb_sa_free;
+
+		list_del(&vmsa_node->list);
+		kfree(vmsa_node);
+	}
+
+	return 0;
+}
+
+static int create_vmsa_list(struct kvm *kvm,
+			    struct sev_info_migration_node *entry)
+{
+	int i;
+	const int num_vcpus = atomic_read(&kvm->online_vcpus);
+	struct vmsa_node *node;
+	struct kvm_vcpu *vcpu;
+	struct vcpu_svm *svm;
+
+	INIT_LIST_HEAD(&entry->vmsa_list);
+	for (i = 0; i < num_vcpus; ++i) {
+		node = kzalloc(sizeof(*node), GFP_KERNEL);
+		if (!node)
+			goto e_freelist;
+
+		vcpu = kvm->vcpus[i];
+		node->vcpu_id = vcpu->vcpu_id;
+
+		svm = to_svm(vcpu);
+		node->vmsa = svm->vmsa;
+		svm->vmsa = NULL;
+		node->ghcb = svm->ghcb;
+		svm->ghcb = NULL;
+		node->ghcb_gpa = svm->vmcb->control.ghcb_gpa;
+		node->ghcb_sa = svm->ghcb_sa;
+		svm->ghcb_sa = NULL;
+		node->ghcb_sa_len = svm->ghcb_sa_len;
+		svm->ghcb_sa_len = 0;
+		node->ghcb_sa_sync = svm->ghcb_sa_sync;
+		svm->ghcb_sa_sync = false;
+		node->ghcb_sa_free = svm->ghcb_sa_free;
+		svm->ghcb_sa_free = false;
+
+		list_add_tail(&node->list, &entry->vmsa_list);
+	}
+
+	return 0;
+
+e_freelist:
+	if (process_vmsa_list(kvm, &entry->vmsa_list))
+		WARN(1, "Unable to move VMSA list back to source VM. Guest is in a broken state now.");
+	return -1;
+}
+
 static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@ -1174,9 +1280,6 @@  static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	if (!sev_guest(kvm))
 		return -ENOTTY;
 
-	if (sev->es_active)
-		return -EPERM;
-
 	if (sev->info_token != 0)
 		return -EEXIST;
 
@@ -1196,8 +1299,19 @@  static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	INIT_LIST_HEAD(&entry->regions_list);
 	list_replace_init(&sev->regions_list, &entry->regions_list);
 
+	if (sev_es_guest(kvm)) {
+		/*
+		 * If this is an ES guest, we need to move each VMCB's VMSA into a
+		 * list for migration.
+		 */
+		entry->es_enabled = true;
+		entry->ap_jump_table = sev->ap_jump_table;
+		if (create_vmsa_list(kvm, entry))
+			goto e_listdel;
+	}
+
 	if (place_migration_node(entry))
-		goto e_listdel;
+		goto e_vmsadel;
 
 	token = entry->token;
 
@@ -1215,6 +1329,11 @@  static int sev_local_send(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	hash_del(&entry->hnode);
 	spin_unlock(&sev_info_migration_hash_lock);
 
+e_vmsadel:
+	if (sev_es_guest(kvm) && process_vmsa_list(kvm, &entry->vmsa_list))
+		WARN(1,
+		     "Unable to move VMSA list back to source VM. Guest is in a broken state now.");
+
 e_listdel:
 	list_replace_init(&entry->regions_list, &sev->regions_list);
 
@@ -1233,9 +1352,6 @@  static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	if (!sev_guest(kvm))
 		return -ENOTTY;
 
-	if (sev->es_active)
-		return -EPERM;
-
 	if (sev->handle != 0)
 		return -EPERM;
 
@@ -1254,6 +1370,14 @@  static int sev_local_receive(struct kvm *kvm, struct kvm_sev_cmd *argp)
 
 	memcpy(&old_info, sev, sizeof(old_info));
 
+	if (entry->es_enabled) {
+		if (process_vmsa_list(kvm, &entry->vmsa_list))
+			goto err_unlock;
+
+		sev->es_active = true;
+		sev->ap_jump_table = entry->ap_jump_table;
+	}
+
 	/*
 	 * The source VM always frees @entry On the target we simply
 	 * mark the token as invalid to notify the source the sev info
@@ -2046,12 +2170,22 @@  void sev_vm_destroy(struct kvm *kvm)
 		__unregister_region_list_locked(kvm, &sev->regions_list);
 	}
 
-	/*
-	 * If userspace was terminated before unregistering the memory
-	 * regions then lets unpin all the registered memory.
-	 */
-	if (entry)
+	if (entry) {
+		/*
+		 * If there are any saved VMSAs, restore them so they can be
+		 * destructed through the normal path.
+		 */
+		if (entry->es_enabled)
+			if (process_vmsa_list(kvm, &entry->vmsa_list))
+				WARN(1,
+				     "Unable to clean up vmsa_list");
+
+		/*
+		 * If userspace was terminated before unregistering the memory
+		 * regions then lets unpin all the registered memory.
+		 */
 		__unregister_region_list_locked(kvm, &entry->regions_list);
+	}
 
 	mutex_unlock(&kvm->lock);
 
@@ -2243,9 +2377,11 @@  void sev_free_vcpu(struct kvm_vcpu *vcpu)
 
 	svm = to_svm(vcpu);
 
-	if (vcpu->arch.guest_state_protected)
+	if (svm->ghcb && vcpu->arch.guest_state_protected)
 		sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
-	__free_page(virt_to_page(svm->vmsa));
+
+	if (svm->vmsa)
+		__free_page(virt_to_page(svm->vmsa));
 
 	if (svm->ghcb_sa_free)
 		kfree(svm->ghcb_sa);