diff mbox

[RFC,Part2,v3,22/26] KVM: SVM: Pin guest memory when SEV is active

Message ID 20170724200303.12197-23-brijesh.singh@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Brijesh Singh July 24, 2017, 8:02 p.m. UTC
The SEV memory encryption engine uses a tweak such that two identical
plaintexts at different location will have a different ciphertexts.
So swapping or moving ciphertexts of two pages will not result in
plaintexts being swapped. Relocating (or migrating) a physical backing
pages for SEV guest will require some additional steps. The current SEV
key management spec does not provide commands to swap or migrate (move)
ciphertexts. For now, we pin the guest memory registered through
KVM_MEMORY_ENCRYPT_REGISTER_RAM ioctl.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/include/asm/kvm_host.h |   1 +
 arch/x86/kvm/svm.c              | 113 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+)

Comments

Borislav Petkov Sept. 14, 2017, 2 p.m. UTC | #1
On Mon, Jul 24, 2017 at 03:02:59PM -0500, Brijesh Singh wrote:
> The SEV memory encryption engine uses a tweak such that two identical
> plaintexts at different location will have a different ciphertexts.

plaintexts or plaintext pages?		also, s/a //

> So swapping or moving ciphertexts of two pages will not result in
> plaintexts being swapped. Relocating (or migrating) a physical backing

s/a //

> pages for SEV guest will require some additional steps. The current SEV

"for a SEV guest"

> key management spec does not provide commands to swap or migrate (move)
> ciphertexts. For now, we pin the guest memory registered through
> KVM_MEMORY_ENCRYPT_REGISTER_RAM ioctl.
> 
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>  arch/x86/include/asm/kvm_host.h |   1 +
>  arch/x86/kvm/svm.c              | 113 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 114 insertions(+)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 150177e..a91aadf 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -747,6 +747,7 @@ struct kvm_sev_info {
>  	unsigned int handle;	/* firmware handle */
>  	unsigned int asid;	/* asid for this guest */
>  	int sev_fd;		/* SEV device fd */
> +	struct list_head ram_list; /* list of registered ram */

regions_list I guess.

>  };
>  
>  struct kvm_arch {
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 75dcaa9..cdb1cf3 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -333,8 +333,19 @@ static int sev_asid_new(void);
>  static void sev_asid_free(int asid);
>  static void sev_deactivate_handle(struct kvm *kvm, int *error);
>  static void sev_decommission_handle(struct kvm *kvm, int *error);
> +static void sev_unpin_memory(struct page **pages, unsigned long npages);

Unneeded.

> +
>  #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
>  
> +struct kvm_sev_pin_ram {

sev_pinned_region

> +	struct list_head list;
> +	unsigned long npages;
> +	struct page **pages;
> +	struct kvm_memory_encrypt_ram userspace;

That member would need a comment what it is.

> +};
> +
> +static void __mem_encrypt_unregister_ram(struct kvm_sev_pin_ram *ram);

Move code so that you don't need that one.

> +
>  static bool svm_sev_enabled(void)
>  {
>  	return !!max_sev_asid;
> @@ -385,6 +396,11 @@ static inline void sev_set_fd(struct kvm *kvm, int fd)
>  	to_sev_info(kvm)->sev_fd = fd;
>  }
>  
> +static inline struct list_head *sev_get_ram_list(struct kvm *kvm)
> +{
> +	return &to_sev_info(kvm)->ram_list;
> +}
> +
>  static inline void mark_all_dirty(struct vmcb *vmcb)
>  {
>  	vmcb->control.clean = 0;
> @@ -1566,10 +1582,24 @@ static void sev_firmware_uninit(void)
>  static void sev_vm_destroy(struct kvm *kvm)
>  {
>  	int state, error;
> +	struct list_head *pos, *q;
> +	struct kvm_sev_pin_ram *ram;
> +	struct list_head *head = sev_get_ram_list(kvm);

Please sort function local variables declaration in a reverse christmas
tree order:

	<type> longest_variable_name;
	<type> shorter_var_name;
	<type> even_shorter;
	<type> i;

>  
>  	if (!sev_guest(kvm))
>  		return;
>  
> +	/*
> +	 * if userspace was terminated before unregistering the memory region
> +	 * then lets unpin all the registered memory.
> +	 */
> +	if (!list_empty(head)) {
> +		list_for_each_safe(pos, q, head) {
> +			ram = list_entry(pos, struct kvm_sev_pin_ram, list);
> +			__mem_encrypt_unregister_ram(ram);

You don't need the local "ram" varible here:

			__mem_encrypt_unregister_ram(list_entry(pos, struct kvm_sev_pin_ram, list));

> +		}
> +	}
> +
>  	/* release the firmware resources for this guest */
>  	if (sev_get_handle(kvm)) {
>  		sev_deactivate_handle(kvm, &error);
> @@ -5640,6 +5670,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	sev_set_active(kvm);
>  	sev_set_asid(kvm, asid);
>  	sev_set_fd(kvm, argp->sev_fd);
> +	INIT_LIST_HEAD(sev_get_ram_list(kvm));
>  	ret = 0;
>  e_err:
>  	fdput(f);
> @@ -6437,6 +6468,86 @@ static int svm_memory_encryption_op(struct kvm *kvm, void __user *argp)
>  	return r;
>  }
>  
> +static int mem_encrypt_register_ram(struct kvm *kvm,
> +				    struct kvm_memory_encrypt_ram *ram)
> +{

Please call that arg "regions" or so. "ram" is strange. In the other
functions too.

> +	struct list_head *head = sev_get_ram_list(kvm);
> +	struct kvm_sev_pin_ram *pin_ram;
> +
> +	if (!sev_guest(kvm))
> +		return -ENOTTY;
> +
> +	pin_ram = kzalloc(sizeof(*pin_ram), GFP_KERNEL);
> +	if (!pin_ram)
> +		return -ENOMEM;
> +
> +	pin_ram->pages = sev_pin_memory(ram->address, ram->size,
> +					&pin_ram->npages, 1);

Let it stick out.

> +	if (!pin_ram->pages)
> +		goto e_free;
> +
> +	/*
> +	 * Guest may change the memory encryption attribute from C=0 -> C=1

"The guest"

> +	 * for this memory range. Lets make sure caches are flushed to ensure
> +	 * that guest data gets written into memory with correct C-bit.
> +	 */
> +	sev_clflush_pages(pin_ram->pages, pin_ram->npages);
> +
> +	pin_ram->userspace.address = ram->address;
> +	pin_ram->userspace.size = ram->size;
> +	list_add_tail(&pin_ram->list, head);
> +	return 0;

<---- newline here.

> +e_free:
> +	kfree(pin_ram);
> +	return 1;
> +}
> +
> +static struct kvm_sev_pin_ram *sev_find_pinned_ram(struct kvm *kvm,
> +					struct kvm_memory_encrypt_ram *ram)

So this function signature is almost impossible to read: you have "kvm"
"sev" "pin" "ram" and those long structure names.

Now look how something like this:

static struct regions_list *
sev_find_pinned_memory(struct kvm *kvm, struct enc_range *range)

tells you exactly what the function does.

> +{
> +	struct list_head *head = sev_get_ram_list(kvm);
> +	struct kvm_sev_pin_ram *i;
> +
> +	list_for_each_entry(i, head, list) {
> +		if (i->userspace.address == ram->address &&
> +			i->userspace.size == ram->size)

		if (i->usr.addr == reg->addr &&
		    i->usr.size == reg->size)

reads much better to me.

> +			return i;
> +	}
> +
> +	return NULL;
> +}
> +
> +static void __mem_encrypt_unregister_ram(struct kvm_sev_pin_ram *ram)
> +{
> +	/*
> +	 * Guest may have changed the memory encryption attribute from

"The guest"

> +	 * C=0 -> C=1. Lets make sure caches are flushed to ensure in data

Both comments talk about the 0 -> 1 case for the C-bit. What about the
reverse: 1->0? Do we not flush there or we don't have cases where a
guest doesn't decrypt its memory?

> +	 * gets written into memory with correct C-bit.
> +	 */
> +	sev_clflush_pages(ram->pages, ram->npages);
> +
> +	sev_unpin_memory(ram->pages, ram->npages);
> +	list_del(&ram->list);
> +	kfree(ram);
> +}
> +
> +static int mem_encrypt_unregister_ram(struct kvm *kvm,
> +				      struct kvm_memory_encrypt_ram *ram)
> +{
> +	struct kvm_sev_pin_ram *pinned_ram;
> +
> +	if (!sev_guest(kvm))
> +		return -ENOTTY;
> +
> +	pinned_ram = sev_find_pinned_ram(kvm, ram);
> +	if (!pinned_ram)
> +		return -EINVAL;
> +
> +	__mem_encrypt_unregister_ram(pinned_ram);
> +
> +	return 0;
> +}
> +
>  static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>  	.cpu_has_kvm_support = has_svm,
>  	.disabled_by_bios = is_disabled,
> @@ -6551,6 +6662,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>  	.setup_mce = svm_setup_mce,
>  
>  	.memory_encryption_op = svm_memory_encryption_op,
> +	.memory_encryption_register_ram = mem_encrypt_register_ram,
> +	.memory_encryption_unregister_ram = mem_encrypt_unregister_ram,

Names are too long. mem_encrypt_reg_memory or so I guess. In general,
choose a prefix and stick with it. mem_enc, mem_encrypt, mem_crypt...
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 150177e..a91aadf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -747,6 +747,7 @@  struct kvm_sev_info {
 	unsigned int handle;	/* firmware handle */
 	unsigned int asid;	/* asid for this guest */
 	int sev_fd;		/* SEV device fd */
+	struct list_head ram_list; /* list of registered ram */
 };
 
 struct kvm_arch {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 75dcaa9..cdb1cf3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -333,8 +333,19 @@  static int sev_asid_new(void);
 static void sev_asid_free(int asid);
 static void sev_deactivate_handle(struct kvm *kvm, int *error);
 static void sev_decommission_handle(struct kvm *kvm, int *error);
+static void sev_unpin_memory(struct page **pages, unsigned long npages);
+
 #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
 
+struct kvm_sev_pin_ram {
+	struct list_head list;
+	unsigned long npages;
+	struct page **pages;
+	struct kvm_memory_encrypt_ram userspace;
+};
+
+static void __mem_encrypt_unregister_ram(struct kvm_sev_pin_ram *ram);
+
 static bool svm_sev_enabled(void)
 {
 	return !!max_sev_asid;
@@ -385,6 +396,11 @@  static inline void sev_set_fd(struct kvm *kvm, int fd)
 	to_sev_info(kvm)->sev_fd = fd;
 }
 
+static inline struct list_head *sev_get_ram_list(struct kvm *kvm)
+{
+	return &to_sev_info(kvm)->ram_list;
+}
+
 static inline void mark_all_dirty(struct vmcb *vmcb)
 {
 	vmcb->control.clean = 0;
@@ -1566,10 +1582,24 @@  static void sev_firmware_uninit(void)
 static void sev_vm_destroy(struct kvm *kvm)
 {
 	int state, error;
+	struct list_head *pos, *q;
+	struct kvm_sev_pin_ram *ram;
+	struct list_head *head = sev_get_ram_list(kvm);
 
 	if (!sev_guest(kvm))
 		return;
 
+	/*
+	 * if userspace was terminated before unregistering the memory region
+	 * then lets unpin all the registered memory.
+	 */
+	if (!list_empty(head)) {
+		list_for_each_safe(pos, q, head) {
+			ram = list_entry(pos, struct kvm_sev_pin_ram, list);
+			__mem_encrypt_unregister_ram(ram);
+		}
+	}
+
 	/* release the firmware resources for this guest */
 	if (sev_get_handle(kvm)) {
 		sev_deactivate_handle(kvm, &error);
@@ -5640,6 +5670,7 @@  static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	sev_set_active(kvm);
 	sev_set_asid(kvm, asid);
 	sev_set_fd(kvm, argp->sev_fd);
+	INIT_LIST_HEAD(sev_get_ram_list(kvm));
 	ret = 0;
 e_err:
 	fdput(f);
@@ -6437,6 +6468,86 @@  static int svm_memory_encryption_op(struct kvm *kvm, void __user *argp)
 	return r;
 }
 
+static int mem_encrypt_register_ram(struct kvm *kvm,
+				    struct kvm_memory_encrypt_ram *ram)
+{
+	struct list_head *head = sev_get_ram_list(kvm);
+	struct kvm_sev_pin_ram *pin_ram;
+
+	if (!sev_guest(kvm))
+		return -ENOTTY;
+
+	pin_ram = kzalloc(sizeof(*pin_ram), GFP_KERNEL);
+	if (!pin_ram)
+		return -ENOMEM;
+
+	pin_ram->pages = sev_pin_memory(ram->address, ram->size,
+					&pin_ram->npages, 1);
+	if (!pin_ram->pages)
+		goto e_free;
+
+	/*
+	 * Guest may change the memory encryption attribute from C=0 -> C=1
+	 * for this memory range. Lets make sure caches are flushed to ensure
+	 * that guest data gets written into memory with correct C-bit.
+	 */
+	sev_clflush_pages(pin_ram->pages, pin_ram->npages);
+
+	pin_ram->userspace.address = ram->address;
+	pin_ram->userspace.size = ram->size;
+	list_add_tail(&pin_ram->list, head);
+	return 0;
+e_free:
+	kfree(pin_ram);
+	return 1;
+}
+
+static struct kvm_sev_pin_ram *sev_find_pinned_ram(struct kvm *kvm,
+					struct kvm_memory_encrypt_ram *ram)
+{
+	struct list_head *head = sev_get_ram_list(kvm);
+	struct kvm_sev_pin_ram *i;
+
+	list_for_each_entry(i, head, list) {
+		if (i->userspace.address == ram->address &&
+			i->userspace.size == ram->size)
+			return i;
+	}
+
+	return NULL;
+}
+
+static void __mem_encrypt_unregister_ram(struct kvm_sev_pin_ram *ram)
+{
+	/*
+	 * Guest may have changed the memory encryption attribute from
+	 * C=0 -> C=1. Lets make sure caches are flushed to ensure in data
+	 * gets written into memory with correct C-bit.
+	 */
+	sev_clflush_pages(ram->pages, ram->npages);
+
+	sev_unpin_memory(ram->pages, ram->npages);
+	list_del(&ram->list);
+	kfree(ram);
+}
+
+static int mem_encrypt_unregister_ram(struct kvm *kvm,
+				      struct kvm_memory_encrypt_ram *ram)
+{
+	struct kvm_sev_pin_ram *pinned_ram;
+
+	if (!sev_guest(kvm))
+		return -ENOTTY;
+
+	pinned_ram = sev_find_pinned_ram(kvm, ram);
+	if (!pinned_ram)
+		return -EINVAL;
+
+	__mem_encrypt_unregister_ram(pinned_ram);
+
+	return 0;
+}
+
 static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -6551,6 +6662,8 @@  static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.setup_mce = svm_setup_mce,
 
 	.memory_encryption_op = svm_memory_encryption_op,
+	.memory_encryption_register_ram = mem_encrypt_register_ram,
+	.memory_encryption_unregister_ram = mem_encrypt_unregister_ram,
 };
 
 static int __init svm_init(void)