diff mbox series

[RFC,v8,03/56] KVM: x86: Add platform hooks for private memory invalidations

Message ID 20230220183847.59159-4-michael.roth@amd.com (mailing list archive)
State New
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand

Commit Message

Michael Roth Feb. 20, 2023, 6:37 p.m. UTC
In some cases, like with SEV-SNP, guest memory needs to be updated in a
platform-specific manner before it can be safely freed back to the host.
Add hooks to wire up handling of this sort to the invalidation notifiers
for restricted memory.

Also issue invalidations of all allocated pages during notifier/memslot
unbinding so that the pages are not left in an unusable state when
they eventually get freed back to the host upon FD release.

Signed-off-by: Michael Roth <michael.roth@amd.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  1 +
 arch/x86/kvm/mmu/mmu.c             |  5 +++++
 include/linux/kvm_host.h           |  3 +++
 mm/restrictedmem.c                 | 12 +++++++++++-
 virt/kvm/kvm_main.c                | 12 +++++++++++-
 6 files changed, 32 insertions(+), 2 deletions(-)

Comments

Isaku Yamahata March 18, 2023, 5:13 a.m. UTC | #1
On Mon, Feb 20, 2023 at 12:37:54PM -0600,
Michael Roth <michael.roth@amd.com> wrote:

> In some cases, like with SEV-SNP, guest memory needs to be updated in a
> platform-specific manner before it can be safely freed back to the host.
> Add hooks to wire up handling of this sort to the invalidation notifiers
> for restricted memory.
> 
> Also issue invalidations of all allocated pages during notifier/memslot
> unbinding so that the pages are not left in an unusable state when
> they eventually get freed back to the host upon FD release.

I'm just curios. Could you please elaborate?
Unbind is happen only when memory slot is delete or vm is destroyed.  In the
case of memory slot deletion, the gpa region is zapped via
kvm_arch_commit_memory_region().  In the case of VM destroy, we have
kvm_flush_shadow_all() which calls
kvm_arch_flush_shadow_all() =>kvm_mmu_zap_all().  Doesn't it work?

Thanks,
Michael Roth March 20, 2023, 6:09 p.m. UTC | #2
On Fri, Mar 17, 2023 at 10:13:22PM -0700, Isaku Yamahata wrote:
> On Mon, Feb 20, 2023 at 12:37:54PM -0600,
> Michael Roth <michael.roth@amd.com> wrote:
> 
> > In some cases, like with SEV-SNP, guest memory needs to be updated in a
> > platform-specific manner before it can be safely freed back to the host.
> > Add hooks to wire up handling of this sort to the invalidation notifiers
> > for restricted memory.
> > 
> > Also issue invalidations of all allocated pages during notifier/memslot
> > unbinding so that the pages are not left in an unusable state when
> > they eventually get freed back to the host upon FD release.
> 
> I'm just curios. Could you please elaborate?
> Unbind is happen only when memory slot is delete or vm is destroyed.  In the
> case of memory slot deletion, the gpa region is zapped via
> kvm_arch_commit_memory_region().  In the case of VM destroy, we have
> kvm_flush_shadow_all() which calls
> kvm_arch_flush_shadow_all() =>kvm_mmu_zap_all().  Doesn't it work?

The main thing here is unbind happens right before the restrictedmem
pages are released back to the host, and for SNP we need to clear the
associated RMP table entries to switch them from guest-owned to
hypervisor-owned. It doesn't necessarily need to be a separate callback,
but I'm not sure if it makes sense to squash that down into the various
MMU zapping helpers.

-Mike

> 
> Thanks,
> -- 
> Isaku Yamahata <isaku.yamahata@gmail.com>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index a8aaf532c2ab..6a885f024a00 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -133,6 +133,7 @@  KVM_X86_OP(vcpu_deliver_sipi_vector)
 KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
 KVM_X86_OP_OPTIONAL_RET0(fault_is_private);
 KVM_X86_OP_OPTIONAL_RET0(update_mem_attr)
+KVM_X86_OP_OPTIONAL(invalidate_restricted_mem)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2da3fb2d5d1b..37c92412035f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1646,6 +1646,7 @@  struct kvm_x86_ops {
 	bool (*fault_is_private)(struct kvm *kvm, gpa_t gpa, u64 error_code, bool *private_fault);
 	int (*update_mem_attr)(struct kvm_memory_slot *slot, unsigned int attr,
 			       gfn_t start, gfn_t end);
+	void (*invalidate_restricted_mem)(struct kvm_memory_slot *slot, gfn_t start, gfn_t end);
 
 	bool (*has_wbinvd_exit)(void);
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 053bd77bbf52..360af0c9997e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7264,4 +7264,9 @@  void kvm_arch_post_set_memory_attributes(struct kvm *kvm,
 		pr_warn_ratelimited("Failed to update GFN range 0x%llx-0x%llx with attributes 0x%lx. Ret: %d\n",
 				    start, end, attrs, ret);
 }
+
+void kvm_arch_invalidate_restricted_mem(struct kvm_memory_slot *slot, gfn_t start, gfn_t end)
+{
+	static_call_cond(kvm_x86_invalidate_restricted_mem)(slot, start, end);
+}
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d200b8f45583..4d542060cd93 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2341,6 +2341,9 @@  static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
 	       kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
 
 }
+
+void kvm_arch_invalidate_restricted_mem(struct kvm_memory_slot *slot, gfn_t start, gfn_t end);
+
 #else
 static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
 {
diff --git a/mm/restrictedmem.c b/mm/restrictedmem.c
index fd6f3c66033f..c8353c592cfe 100644
--- a/mm/restrictedmem.c
+++ b/mm/restrictedmem.c
@@ -17,7 +17,7 @@  struct restrictedmem {
 
 static int restrictedmem_release(struct inode *inode, struct file *file)
 {
-	struct restrictedmem *rm = inode->i_mapping->private_data;
+	struct restrictedmem *rm = file->f_mapping->private_data;
 
 	xa_destroy(&rm->bindings);
 	fput(rm->memfd);
@@ -305,10 +305,20 @@  void restrictedmem_unbind(struct file *file, pgoff_t start, pgoff_t end,
 			  struct restrictedmem_notifier *notifier)
 {
 	struct restrictedmem *rm = file->f_mapping->private_data;
+	unsigned long index;
 
+	pr_debug("%s: unregistering notifier, invalidating page offsets 0x%lx-0x%lx\n",
+		 __func__, start, end);
 	down_write(&rm->lock);
+
+	xa_for_each_range(&rm->bindings, index, notifier, start, end)
+		notifier->ops->invalidate_start(notifier, start, end);
+	xa_for_each_range(&rm->bindings, index, notifier, start, end)
+		notifier->ops->invalidate_end(notifier, start, end);
+
 	xa_store_range(&rm->bindings, start, end, NULL, GFP_KERNEL);
 	synchronize_rcu();
+
 	up_write(&rm->lock);
 }
 EXPORT_SYMBOL_GPL(restrictedmem_unbind);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8ec985f1c57d..f7e00593cc5d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -960,8 +960,15 @@  static void kvm_restrictedmem_invalidate_begin(struct restrictedmem_notifier *no
 	struct kvm *kvm = slot->kvm;
 	int idx;
 
-	if (restrictedmem_get_gfn_range(slot, start, end, &gfn_range))
+	if (restrictedmem_get_gfn_range(slot, start, end, &gfn_range)) {
+		pr_debug("%s: Invalidation skipped, slot: %d, start: 0x%lx, end: 0x%lx, restrictedmem.index: 0x%lx\n",
+			 __func__, slot->id, start, end, slot->restrictedmem.index);
 		return;
+	}
+
+	pr_debug("%s: slot: %d, start: 0x%lx, end: 0x%lx, restrictedmem.index: 0x%lx, gfn_start: 0x%llx, gfn_end: 0x%llx\n",
+		 __func__, slot->id, start, end, slot->restrictedmem.index, gfn_range.start,
+		 gfn_range.end);
 
 	idx = srcu_read_lock(&kvm->srcu);
 	KVM_MMU_LOCK(kvm);
@@ -972,7 +979,10 @@  static void kvm_restrictedmem_invalidate_begin(struct restrictedmem_notifier *no
 		kvm_flush_remote_tlbs(kvm);
 
 	KVM_MMU_UNLOCK(kvm);
+
 	srcu_read_unlock(&kvm->srcu, idx);
+
+	kvm_arch_invalidate_restricted_mem(slot, gfn_range.start, gfn_range.end);
 }
 
 static void kvm_restrictedmem_invalidate_end(struct restrictedmem_notifier *notifier,