diff mbox

[RFC,v2,32/32] x86: kvm: Pin the guest memory when SEV is active

Message ID 148846793743.2349.8478208161427437950.stgit@brijesh-build-machine (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show

Commit Message

Brijesh Singh March 2, 2017, 3:18 p.m. UTC
The SEV memory encryption engine uses a tweak such that two identical
plaintexts at different location will have a different ciphertexts.
So swapping or moving ciphertexts of two pages will not result in
plaintexts being swapped. Relocating (or migrating) a physical backing pages
for SEV guest will require some additional steps. The current SEV key
management spec [1] does not provide commands to swap or migrate (move)
ciphertexts. For now we pin the memory allocated for the SEV guest. In
future when SEV key management spec provides the commands to support the
page migration we can update the KVM code to remove the pinning logical
without making any changes into userspace (qemu).

The patch pins userspace memory when a new slot is created and unpin the
memory when slot is removed.

[1] http://support.amd.com/TechDocs/55766_SEV-KM%20API_Spec.pdf

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/include/asm/kvm_host.h |    6 +++
 arch/x86/kvm/svm.c              |   93 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              |    3 +
 3 files changed, 102 insertions(+)

Comments

Paolo Bonzini March 16, 2017, 10:38 a.m. UTC | #1
On 02/03/2017 16:18, Brijesh Singh wrote:
> The SEV memory encryption engine uses a tweak such that two identical
> plaintexts at different location will have a different ciphertexts.
> So swapping or moving ciphertexts of two pages will not result in
> plaintexts being swapped. Relocating (or migrating) a physical backing pages
> for SEV guest will require some additional steps. The current SEV key
> management spec [1] does not provide commands to swap or migrate (move)
> ciphertexts. For now we pin the memory allocated for the SEV guest. In
> future when SEV key management spec provides the commands to support the
> page migration we can update the KVM code to remove the pinning logical
> without making any changes into userspace (qemu).
> 
> The patch pins userspace memory when a new slot is created and unpin the
> memory when slot is removed.
> 
> [1] http://support.amd.com/TechDocs/55766_SEV-KM%20API_Spec.pdf

This is not enough, because memory can be hidden temporarily from the
guest and remapped later.  Think of a PCI BAR that is backed by RAM, or
also SMRAM.  The pinning must be kept even in that case.

You need to add a pair of KVM_MEMORY_ENCRYPT_OPs (one that doesn't map
to a PSP operation), such as KVM_REGISTER/UNREGISTER_ENCRYPTED_RAM.  In
QEMU you can use a RAMBlockNotifier to invoke the ioctls.

Paolo

> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    6 +++
>  arch/x86/kvm/svm.c              |   93 +++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.c              |    3 +
>  3 files changed, 102 insertions(+)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index fcc4710..9dc59f0 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -723,6 +723,7 @@ struct kvm_sev_info {
>  	unsigned int handle;	/* firmware handle */
>  	unsigned int asid;	/* asid for this guest */
>  	int sev_fd;		/* SEV device fd */
> +	struct list_head pinned_memory_slot;
>  };
>  
>  struct kvm_arch {
> @@ -1043,6 +1044,11 @@ struct kvm_x86_ops {
>  	void (*setup_mce)(struct kvm_vcpu *vcpu);
>  
>  	int (*memory_encryption_op)(struct kvm *kvm, void __user *argp);
> +
> +	void (*prepare_memory_region)(struct kvm *kvm,
> +			struct kvm_memory_slot *memslot,
> +			const struct kvm_userspace_memory_region *mem,
> +			enum kvm_mr_change change);
>  };
>  
>  struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 13996d6..ab973f9 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -498,12 +498,21 @@ static inline bool gif_set(struct vcpu_svm *svm)
>  }
>  
>  /* Secure Encrypted Virtualization */
> +struct kvm_sev_pinned_memory_slot {
> +	struct list_head list;
> +	unsigned long npages;
> +	struct page **pages;
> +	unsigned long userspace_addr;
> +	short id;
> +};
> +
>  static unsigned int max_sev_asid;
>  static unsigned long *sev_asid_bitmap;
>  static void sev_deactivate_handle(struct kvm *kvm);
>  static void sev_decommission_handle(struct kvm *kvm);
>  static int sev_asid_new(void);
>  static void sev_asid_free(int asid);
> +static void sev_unpin_memory(struct page **pages, unsigned long npages);
>  #define __sev_page_pa(x) ((page_to_pfn(x) << PAGE_SHIFT) | sme_me_mask)
>  
>  static bool kvm_sev_enabled(void)
> @@ -1544,9 +1553,25 @@ static inline int avic_free_vm_id(int id)
>  
>  static void sev_vm_destroy(struct kvm *kvm)
>  {
> +	struct list_head *pos, *q;
> +	struct kvm_sev_pinned_memory_slot *pinned_slot;
> +	struct list_head *head = &kvm->arch.sev_info.pinned_memory_slot;
> +
>  	if (!sev_guest(kvm))
>  		return;
>  
> +	/* if guest memory is pinned then unpin it now */
> +	if (!list_empty(head)) {
> +		list_for_each_safe(pos, q, head) {
> +			pinned_slot = list_entry(pos,
> +				struct kvm_sev_pinned_memory_slot, list);
> +			sev_unpin_memory(pinned_slot->pages,
> +					pinned_slot->npages);
> +			list_del(pos);
> +			kfree(pinned_slot);
> +		}
> +	}
> +
>  	/* release the firmware resources */
>  	sev_deactivate_handle(kvm);
>  	sev_decommission_handle(kvm);
> @@ -5663,6 +5688,8 @@ static int sev_pre_start(struct kvm *kvm, int *asid)
>  		}
>  		*asid = ret;
>  		ret = 0;
> +
> +		INIT_LIST_HEAD(&kvm->arch.sev_info.pinned_memory_slot);
>  	}
>  
>  	return ret;
> @@ -6189,6 +6216,71 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	return ret;
>  }
>  
> +static struct kvm_sev_pinned_memory_slot *sev_find_pinned_memory_slot(
> +		struct kvm *kvm, struct kvm_memory_slot *slot)
> +{
> +	struct kvm_sev_pinned_memory_slot *i;
> +	struct list_head *head = &kvm->arch.sev_info.pinned_memory_slot;
> +
> +	list_for_each_entry(i, head, list) {
> +		if (i->userspace_addr == slot->userspace_addr &&
> +			i->id == slot->id)
> +			return i;
> +	}
> +
> +	return NULL;
> +}
> +
> +static void amd_prepare_memory_region(struct kvm *kvm,
> +				struct kvm_memory_slot *memslot,
> +				const struct kvm_userspace_memory_region *mem,
> +				enum kvm_mr_change change)
> +{
> +	struct kvm_sev_pinned_memory_slot *pinned_slot;
> +	struct list_head *head = &kvm->arch.sev_info.pinned_memory_slot;
> +
> +	mutex_lock(&kvm->lock);
> +
> +	if (!sev_guest(kvm))
> +		goto unlock;
> +
> +	if (change == KVM_MR_CREATE) {
> +
> +		if (!mem->memory_size)
> +			goto unlock;
> +
> +		pinned_slot = kmalloc(sizeof(*pinned_slot), GFP_KERNEL);
> +		if (pinned_slot == NULL)
> +			goto unlock;
> +
> +		pinned_slot->pages = sev_pin_memory(mem->userspace_addr,
> +				mem->memory_size, &pinned_slot->npages);
> +		if (pinned_slot->pages == NULL) {
> +			kfree(pinned_slot);
> +			goto unlock;
> +		}
> +
> +		sev_clflush_pages(pinned_slot->pages, pinned_slot->npages);
> +
> +		pinned_slot->id = memslot->id;
> +		pinned_slot->userspace_addr = mem->userspace_addr;
> +		list_add_tail(&pinned_slot->list, head);
> +
> +	} else if  (change == KVM_MR_DELETE) {
> +
> +		pinned_slot = sev_find_pinned_memory_slot(kvm, memslot);
> +		if (!pinned_slot)
> +			goto unlock;
> +
> +		sev_unpin_memory(pinned_slot->pages, pinned_slot->npages);
> +		list_del(&pinned_slot->list);
> +		kfree(pinned_slot);
> +	}
> +
> +unlock:
> +	mutex_unlock(&kvm->lock);
> +}
> +
>  static int amd_memory_encryption_cmd(struct kvm *kvm, void __user *argp)
>  {
>  	int r = -ENOTTY;
> @@ -6355,6 +6447,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>  	.update_pi_irte = svm_update_pi_irte,
>  
>  	.memory_encryption_op = amd_memory_encryption_cmd,
> +	.prepare_memory_region = amd_prepare_memory_region,
>  };
>  
>  static int __init svm_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 6a737e9..e05069d 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8195,6 +8195,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
>  				const struct kvm_userspace_memory_region *mem,
>  				enum kvm_mr_change change)
>  {
> +	if (kvm_x86_ops->prepare_memory_region)
> +		kvm_x86_ops->prepare_memory_region(kvm, memslot, mem, change);
> +
>  	return 0;
>  }
>  
>
Brijesh Singh March 16, 2017, 6:17 p.m. UTC | #2
On 03/16/2017 05:38 AM, Paolo Bonzini wrote:
>
>
> On 02/03/2017 16:18, Brijesh Singh wrote:
>> The SEV memory encryption engine uses a tweak such that two identical
>> plaintexts at different location will have a different ciphertexts.
>> So swapping or moving ciphertexts of two pages will not result in
>> plaintexts being swapped. Relocating (or migrating) a physical backing pages
>> for SEV guest will require some additional steps. The current SEV key
>> management spec [1] does not provide commands to swap or migrate (move)
>> ciphertexts. For now we pin the memory allocated for the SEV guest. In
>> future when SEV key management spec provides the commands to support the
>> page migration we can update the KVM code to remove the pinning logical
>> without making any changes into userspace (qemu).
>>
>> The patch pins userspace memory when a new slot is created and unpin the
>> memory when slot is removed.
>>
>> [1] http://support.amd.com/TechDocs/55766_SEV-KM%20API_Spec.pdf
>
> This is not enough, because memory can be hidden temporarily from the
> guest and remapped later.  Think of a PCI BAR that is backed by RAM, or
> also SMRAM.  The pinning must be kept even in that case.
>
> You need to add a pair of KVM_MEMORY_ENCRYPT_OPs (one that doesn't map
> to a PSP operation), such as KVM_REGISTER/UNREGISTER_ENCRYPTED_RAM.  In
> QEMU you can use a RAMBlockNotifier to invoke the ioctls.
>

I was hoping to avoid adding new ioctl, but I see your point. Will add a pair of ioctl's
and use RAMBlocNotifier to invoke those ioctls.

-Brijesh
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fcc4710..9dc59f0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -723,6 +723,7 @@  struct kvm_sev_info {
 	unsigned int handle;	/* firmware handle */
 	unsigned int asid;	/* asid for this guest */
 	int sev_fd;		/* SEV device fd */
+	struct list_head pinned_memory_slot;
 };
 
 struct kvm_arch {
@@ -1043,6 +1044,11 @@  struct kvm_x86_ops {
 	void (*setup_mce)(struct kvm_vcpu *vcpu);
 
 	int (*memory_encryption_op)(struct kvm *kvm, void __user *argp);
+
+	void (*prepare_memory_region)(struct kvm *kvm,
+			struct kvm_memory_slot *memslot,
+			const struct kvm_userspace_memory_region *mem,
+			enum kvm_mr_change change);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 13996d6..ab973f9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -498,12 +498,21 @@  static inline bool gif_set(struct vcpu_svm *svm)
 }
 
 /* Secure Encrypted Virtualization */
+struct kvm_sev_pinned_memory_slot {
+	struct list_head list;
+	unsigned long npages;
+	struct page **pages;
+	unsigned long userspace_addr;
+	short id;
+};
+
 static unsigned int max_sev_asid;
 static unsigned long *sev_asid_bitmap;
 static void sev_deactivate_handle(struct kvm *kvm);
 static void sev_decommission_handle(struct kvm *kvm);
 static int sev_asid_new(void);
 static void sev_asid_free(int asid);
+static void sev_unpin_memory(struct page **pages, unsigned long npages);
 #define __sev_page_pa(x) ((page_to_pfn(x) << PAGE_SHIFT) | sme_me_mask)
 
 static bool kvm_sev_enabled(void)
@@ -1544,9 +1553,25 @@  static inline int avic_free_vm_id(int id)
 
 static void sev_vm_destroy(struct kvm *kvm)
 {
+	struct list_head *pos, *q;
+	struct kvm_sev_pinned_memory_slot *pinned_slot;
+	struct list_head *head = &kvm->arch.sev_info.pinned_memory_slot;
+
 	if (!sev_guest(kvm))
 		return;
 
+	/* if guest memory is pinned then unpin it now */
+	if (!list_empty(head)) {
+		list_for_each_safe(pos, q, head) {
+			pinned_slot = list_entry(pos,
+				struct kvm_sev_pinned_memory_slot, list);
+			sev_unpin_memory(pinned_slot->pages,
+					pinned_slot->npages);
+			list_del(pos);
+			kfree(pinned_slot);
+		}
+	}
+
 	/* release the firmware resources */
 	sev_deactivate_handle(kvm);
 	sev_decommission_handle(kvm);
@@ -5663,6 +5688,8 @@  static int sev_pre_start(struct kvm *kvm, int *asid)
 		}
 		*asid = ret;
 		ret = 0;
+
+		INIT_LIST_HEAD(&kvm->arch.sev_info.pinned_memory_slot);
 	}
 
 	return ret;
@@ -6189,6 +6216,71 @@  static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	return ret;
 }
 
+static struct kvm_sev_pinned_memory_slot *sev_find_pinned_memory_slot(
+		struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	struct kvm_sev_pinned_memory_slot *i;
+	struct list_head *head = &kvm->arch.sev_info.pinned_memory_slot;
+
+	list_for_each_entry(i, head, list) {
+		if (i->userspace_addr == slot->userspace_addr &&
+			i->id == slot->id)
+			return i;
+	}
+
+	return NULL;
+}
+
+static void amd_prepare_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *memslot,
+				const struct kvm_userspace_memory_region *mem,
+				enum kvm_mr_change change)
+{
+	struct kvm_sev_pinned_memory_slot *pinned_slot;
+	struct list_head *head = &kvm->arch.sev_info.pinned_memory_slot;
+
+	mutex_lock(&kvm->lock);
+
+	if (!sev_guest(kvm))
+		goto unlock;
+
+	if (change == KVM_MR_CREATE) {
+
+		if (!mem->memory_size)
+			goto unlock;
+
+		pinned_slot = kmalloc(sizeof(*pinned_slot), GFP_KERNEL);
+		if (pinned_slot == NULL)
+			goto unlock;
+
+		pinned_slot->pages = sev_pin_memory(mem->userspace_addr,
+				mem->memory_size, &pinned_slot->npages);
+		if (pinned_slot->pages == NULL) {
+			kfree(pinned_slot);
+			goto unlock;
+		}
+
+		sev_clflush_pages(pinned_slot->pages, pinned_slot->npages);
+
+		pinned_slot->id = memslot->id;
+		pinned_slot->userspace_addr = mem->userspace_addr;
+		list_add_tail(&pinned_slot->list, head);
+
+	} else if  (change == KVM_MR_DELETE) {
+
+		pinned_slot = sev_find_pinned_memory_slot(kvm, memslot);
+		if (!pinned_slot)
+			goto unlock;
+
+		sev_unpin_memory(pinned_slot->pages, pinned_slot->npages);
+		list_del(&pinned_slot->list);
+		kfree(pinned_slot);
+	}
+
+unlock:
+	mutex_unlock(&kvm->lock);
+}
+
 static int amd_memory_encryption_cmd(struct kvm *kvm, void __user *argp)
 {
 	int r = -ENOTTY;
@@ -6355,6 +6447,7 @@  static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.update_pi_irte = svm_update_pi_irte,
 
 	.memory_encryption_op = amd_memory_encryption_cmd,
+	.prepare_memory_region = amd_prepare_memory_region,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6a737e9..e05069d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8195,6 +8195,9 @@  int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				const struct kvm_userspace_memory_region *mem,
 				enum kvm_mr_change change)
 {
+	if (kvm_x86_ops->prepare_memory_region)
+		kvm_x86_ops->prepare_memory_region(kvm, memslot, mem, change);
+
 	return 0;
 }