diff mbox series

[RFCv2,11/16] KVM: Protected memory extension

Message ID 20201020061859.18385-12-kirill.shutemov@linux.intel.com
State New
Headers show
Series KVM protected memory extension | expand

Commit Message

Kirill A. Shutemov Oct. 20, 2020, 6:18 a.m. UTC
Add infrastructure that handles protected memory extension.

Arch-specific code has to provide hypercalls and define non-zero
VM_KVM_PROTECTED.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 include/linux/kvm_host.h |  4 +++
 virt/kvm/Kconfig         |  3 ++
 virt/kvm/kvm_main.c      | 68 ++++++++++++++++++++++++++++++++++++++
 virt/lib/Makefile        |  1 +
 virt/lib/mem_protected.c | 71 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 147 insertions(+)
 create mode 100644 virt/lib/mem_protected.c

Comments

Peter Zijlstra Oct. 20, 2020, 7:17 a.m. UTC | #1
On Tue, Oct 20, 2020 at 09:18:54AM +0300, Kirill A. Shutemov wrote:
> +int __kvm_protect_memory(unsigned long start, unsigned long end, bool protect)
> +{
> +	struct mm_struct *mm = current->mm;
> +	struct vm_area_struct *vma, *prev;
> +	int ret;
> +
> +	if (mmap_write_lock_killable(mm))
> +		return -EINTR;
> +
> +	ret = -ENOMEM;
> +	vma = find_vma(current->mm, start);
> +	if (!vma)
> +		goto out;
> +
> +	ret = -EINVAL;
> +	if (vma->vm_start > start)
> +		goto out;
> +
> +	if (start > vma->vm_start)
> +		prev = vma;
> +	else
> +		prev = vma->vm_prev;
> +
> +	ret = 0;
> +	while (true) {
> +		unsigned long newflags, tmp;
> +
> +		tmp = vma->vm_end;
> +		if (tmp > end)
> +			tmp = end;
> +
> +		newflags = vma->vm_flags;
> +		if (protect)
> +			newflags |= VM_KVM_PROTECTED;
> +		else
> +			newflags &= ~VM_KVM_PROTECTED;
> +
> +		/* The VMA has been handled as part of other memslot */
> +		if (newflags == vma->vm_flags)
> +			goto next;
> +
> +		ret = mprotect_fixup(vma, &prev, start, tmp, newflags);
> +		if (ret)
> +			goto out;
> +
> +next:
> +		start = tmp;
> +		if (start < prev->vm_end)
> +			start = prev->vm_end;
> +
> +		if (start >= end)
> +			goto out;
> +
> +		vma = prev->vm_next;
> +		if (!vma || vma->vm_start != start) {
> +			ret = -ENOMEM;
> +			goto out;
> +		}
> +	}
> +out:
> +	mmap_write_unlock(mm);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(__kvm_protect_memory);

Since migration will be disabled after this; should the above not (at
the very least) force compaction before proceeding to lock the pages in?
Kirill A. Shutemov Oct. 20, 2020, 12:55 p.m. UTC | #2
On Tue, Oct 20, 2020 at 09:17:01AM +0200, Peter Zijlstra wrote:
> On Tue, Oct 20, 2020 at 09:18:54AM +0300, Kirill A. Shutemov wrote:
> > +int __kvm_protect_memory(unsigned long start, unsigned long end, bool protect)
> > +{
> > +	struct mm_struct *mm = current->mm;
> > +	struct vm_area_struct *vma, *prev;
> > +	int ret;
> > +
> > +	if (mmap_write_lock_killable(mm))
> > +		return -EINTR;
> > +
> > +	ret = -ENOMEM;
> > +	vma = find_vma(current->mm, start);
> > +	if (!vma)
> > +		goto out;
> > +
> > +	ret = -EINVAL;
> > +	if (vma->vm_start > start)
> > +		goto out;
> > +
> > +	if (start > vma->vm_start)
> > +		prev = vma;
> > +	else
> > +		prev = vma->vm_prev;
> > +
> > +	ret = 0;
> > +	while (true) {
> > +		unsigned long newflags, tmp;
> > +
> > +		tmp = vma->vm_end;
> > +		if (tmp > end)
> > +			tmp = end;
> > +
> > +		newflags = vma->vm_flags;
> > +		if (protect)
> > +			newflags |= VM_KVM_PROTECTED;
> > +		else
> > +			newflags &= ~VM_KVM_PROTECTED;
> > +
> > +		/* The VMA has been handled as part of other memslot */
> > +		if (newflags == vma->vm_flags)
> > +			goto next;
> > +
> > +		ret = mprotect_fixup(vma, &prev, start, tmp, newflags);
> > +		if (ret)
> > +			goto out;
> > +
> > +next:
> > +		start = tmp;
> > +		if (start < prev->vm_end)
> > +			start = prev->vm_end;
> > +
> > +		if (start >= end)
> > +			goto out;
> > +
> > +		vma = prev->vm_next;
> > +		if (!vma || vma->vm_start != start) {
> > +			ret = -ENOMEM;
> > +			goto out;
> > +		}
> > +	}
> > +out:
> > +	mmap_write_unlock(mm);
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(__kvm_protect_memory);
> 
> Since migration will be disabled after this; should the above not (at
> the very least) force compaction before proceeding to lock the pages in?

Migration has to be implemented instead, before we hit upstream.

BTW, VMs with direct device assignment pins all guest memory today. So
it's not something new in the virtualization world.
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 380a64613880..6655e8da4555 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -701,6 +701,10 @@  void kvm_arch_flush_shadow_all(struct kvm *kvm);
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 				   struct kvm_memory_slot *slot);
 
+int kvm_protect_all_memory(struct kvm *kvm);
+int kvm_protect_memory(struct kvm *kvm,
+		       unsigned long gfn, unsigned long npages, bool protect);
+
 int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
 			    struct page **pages, int nr_pages);
 
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 1c37ccd5d402..50d7422386aa 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -63,3 +63,6 @@  config HAVE_KVM_NO_POLL
 
 config KVM_XFER_TO_GUEST_WORK
        bool
+
+config HAVE_KVM_PROTECTED_MEMORY
+       bool
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 125db5a73e10..4c008c7b4974 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -154,6 +154,8 @@  static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
 static unsigned long long kvm_createvm_count;
 static unsigned long long kvm_active_vms;
 
+int __kvm_protect_memory(unsigned long start, unsigned long end, bool protect);
+
 __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
 						   unsigned long start, unsigned long end)
 {
@@ -1371,6 +1373,15 @@  int __kvm_set_memory_region(struct kvm *kvm,
 	if (r)
 		goto out_bitmap;
 
+	if (IS_ENABLED(CONFIG_HAVE_KVM_PROTECTED_MEMORY) &&
+	    mem->memory_size && kvm->mem_protected) {
+		r = __kvm_protect_memory(new.userspace_addr,
+					 new.userspace_addr + new.npages * PAGE_SIZE,
+					 true);
+		if (r)
+			goto out_bitmap;
+	}
+
 	if (old.dirty_bitmap && !new.dirty_bitmap)
 		kvm_destroy_dirty_bitmap(&old);
 	return 0;
@@ -2720,6 +2731,63 @@  void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
 
+int kvm_protect_memory(struct kvm *kvm,
+		       unsigned long gfn, unsigned long npages, bool protect)
+{
+	struct kvm_memory_slot *memslot;
+	unsigned long start, end;
+	gfn_t numpages;
+
+	if (!IS_ENABLED(CONFIG_HAVE_KVM_PROTECTED_MEMORY))
+		return -KVM_ENOSYS;
+
+	if (!npages)
+		return 0;
+
+	memslot = gfn_to_memslot(kvm, gfn);
+	/* Not backed by memory. It's okay. */
+	if (!memslot)
+		return 0;
+
+	start = gfn_to_hva_many(memslot, gfn, &numpages);
+	end = start + npages * PAGE_SIZE;
+
+	/* XXX: Share range across memory slots? */
+	if (WARN_ON(numpages < npages))
+		return -EINVAL;
+
+	return __kvm_protect_memory(start, end, protect);
+}
+EXPORT_SYMBOL_GPL(kvm_protect_memory);
+
+int kvm_protect_all_memory(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	unsigned long start, end;
+	int i, ret = 0;;
+
+	if (!IS_ENABLED(CONFIG_HAVE_KVM_PROTECTED_MEMORY))
+		return -KVM_ENOSYS;
+
+	mutex_lock(&kvm->slots_lock);
+	kvm->mem_protected = true;
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		slots = __kvm_memslots(kvm, i);
+		kvm_for_each_memslot(memslot, slots) {
+			start = memslot->userspace_addr;
+			end = start + memslot->npages * PAGE_SIZE;
+			ret = __kvm_protect_memory(start, end, true);
+			if (ret)
+				goto out;
+		}
+	}
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_protect_all_memory);
+
 void kvm_sigset_activate(struct kvm_vcpu *vcpu)
 {
 	if (!vcpu->sigset_active)
diff --git a/virt/lib/Makefile b/virt/lib/Makefile
index bd7f9a78bb6b..d6e50510801f 100644
--- a/virt/lib/Makefile
+++ b/virt/lib/Makefile
@@ -1,2 +1,3 @@ 
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_IRQ_BYPASS_MANAGER) += irqbypass.o
+obj-$(CONFIG_HAVE_KVM_PROTECTED_MEMORY) += mem_protected.o
diff --git a/virt/lib/mem_protected.c b/virt/lib/mem_protected.c
new file mode 100644
index 000000000000..0b01dd74f29c
--- /dev/null
+++ b/virt/lib/mem_protected.c
@@ -0,0 +1,71 @@ 
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <linux/pagewalk.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/tlbflush.h>
+
+int __kvm_protect_memory(unsigned long start, unsigned long end, bool protect)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma, *prev;
+	int ret;
+
+	if (mmap_write_lock_killable(mm))
+		return -EINTR;
+
+	ret = -ENOMEM;
+	vma = find_vma(current->mm, start);
+	if (!vma)
+		goto out;
+
+	ret = -EINVAL;
+	if (vma->vm_start > start)
+		goto out;
+
+	if (start > vma->vm_start)
+		prev = vma;
+	else
+		prev = vma->vm_prev;
+
+	ret = 0;
+	while (true) {
+		unsigned long newflags, tmp;
+
+		tmp = vma->vm_end;
+		if (tmp > end)
+			tmp = end;
+
+		newflags = vma->vm_flags;
+		if (protect)
+			newflags |= VM_KVM_PROTECTED;
+		else
+			newflags &= ~VM_KVM_PROTECTED;
+
+		/* The VMA has been handled as part of other memslot */
+		if (newflags == vma->vm_flags)
+			goto next;
+
+		ret = mprotect_fixup(vma, &prev, start, tmp, newflags);
+		if (ret)
+			goto out;
+
+next:
+		start = tmp;
+		if (start < prev->vm_end)
+			start = prev->vm_end;
+
+		if (start >= end)
+			goto out;
+
+		vma = prev->vm_next;
+		if (!vma || vma->vm_start != start) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+out:
+	mmap_write_unlock(mm);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__kvm_protect_memory);