diff mbox series

[RFC,v2,03/10] KVM: Implement kvm_(read|/write)_guest_page for private memory slots

Message ID 20240801090117.3841080-4-tabba@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: Restricted mapping of guest_memfd at the host and pKVM/arm64 support | expand

Commit Message

Fuad Tabba Aug. 1, 2024, 9:01 a.m. UTC
Make __kvm_read_guest_page/__kvm_write_guest_page capable of
accessing guest memory if no userspace address is available.
Moreover, check that the memory being accessed is shared with the
host before attempting the access.

KVM at the host might need to access shared memory that is not
mapped in the host userspace but is in fact shared with the host,
e.g., when accounting for stolen time. This allows the access
without relying on the slot's userspace_addr being set.

This does not circumvent protection, since the access is only
attempted if the memory is mappable by the host, which implies
shareability.

Signed-off-by: Fuad Tabba <tabba@google.com>
---
 virt/kvm/kvm_main.c | 127 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 111 insertions(+), 16 deletions(-)

Comments

Sean Christopherson Aug. 16, 2024, 7:32 p.m. UTC | #1
On Thu, Aug 01, 2024, Fuad Tabba wrote:
> Make __kvm_read_guest_page/__kvm_write_guest_page capable of
> accessing guest memory if no userspace address is available.
> Moreover, check that the memory being accessed is shared with the
> host before attempting the access.
> 
> KVM at the host might need to access shared memory that is not
> mapped in the host userspace but is in fact shared with the host,
> e.g., when accounting for stolen time. This allows the access
> without relying on the slot's userspace_addr being set.

Why?  As evidenced by the amount of code below, special casing guest_memfd isn't
trivial, and taking kvm->slots_lock is likely a complete non-starter.  In the
happy case, uaccess is about as fast as can be, and has no inherent scaling issues.

> This does not circumvent protection, since the access is only
> attempted if the memory is mappable by the host, which implies
> shareability.
> 
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>  virt/kvm/kvm_main.c | 127 ++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 111 insertions(+), 16 deletions(-)
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index f4b4498d4de6..ec6255c7325e 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -3385,20 +3385,108 @@ int kvm_gmem_clear_mappable(struct kvm *kvm, gfn_t start, gfn_t end)
>  	return kvm_gmem_toggle_mappable(kvm, start, end, false);
>  }
>  
> +static int __kvm_read_private_guest_page(struct kvm *kvm,

The changelog says this is for accessing memory that is shared, but this says
"private".

> +					 struct kvm_memory_slot *slot,
> +					 gfn_t gfn, void *data, int offset,
> +					 int len)
Fuad Tabba Sept. 3, 2024, 9:28 a.m. UTC | #2
Hi Sean,

On Fri, 16 Aug 2024 at 20:32, Sean Christopherson <seanjc@google.com> wrote:
>
> On Thu, Aug 01, 2024, Fuad Tabba wrote:
> > Make __kvm_read_guest_page/__kvm_write_guest_page capable of
> > accessing guest memory if no userspace address is available.
> > Moreover, check that the memory being accessed is shared with the
> > host before attempting the access.
> >
> > KVM at the host might need to access shared memory that is not
> > mapped in the host userspace but is in fact shared with the host,
> > e.g., when accounting for stolen time. This allows the access
> > without relying on the slot's userspace_addr being set.
>
> Why?  As evidenced by the amount of code below, special casing guest_memfd isn't
> trivial, and taking kvm->slots_lock is likely a complete non-starter.  In the
> happy case, uaccess is about as fast as can be, and has no inherent scaling issues.
>
> > This does not circumvent protection, since the access is only
> > attempted if the memory is mappable by the host, which implies
> > shareability.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >  virt/kvm/kvm_main.c | 127 ++++++++++++++++++++++++++++++++++++++------
> >  1 file changed, 111 insertions(+), 16 deletions(-)
> >
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index f4b4498d4de6..ec6255c7325e 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -3385,20 +3385,108 @@ int kvm_gmem_clear_mappable(struct kvm *kvm, gfn_t start, gfn_t end)
> >       return kvm_gmem_toggle_mappable(kvm, start, end, false);
> >  }
> >
> > +static int __kvm_read_private_guest_page(struct kvm *kvm,
>
> The changelog says this is for accessing memory that is shared, but this says
> "private".

This is bad naming on my part. Instead, I should call this function
something like, read_guestmem_page (and similar for the write one).
Thanks for pointing this out.

Cheers,
/fuad

> > +                                      struct kvm_memory_slot *slot,
> > +                                      gfn_t gfn, void *data, int offset,
> > +                                      int len)
diff mbox series

Patch

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f4b4498d4de6..ec6255c7325e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3385,20 +3385,108 @@  int kvm_gmem_clear_mappable(struct kvm *kvm, gfn_t start, gfn_t end)
 	return kvm_gmem_toggle_mappable(kvm, start, end, false);
 }
 
+static int __kvm_read_private_guest_page(struct kvm *kvm,
+					 struct kvm_memory_slot *slot,
+					 gfn_t gfn, void *data, int offset,
+					 int len)
+{
+	struct page *page;
+	u64 pfn;
+	int r = 0;
+
+	if (size_add(offset, len) > PAGE_SIZE)
+		return -E2BIG;
+
+	mutex_lock(&kvm->slots_lock);
+
+	if (!__kvm_gmem_is_mappable(kvm, gfn, gfn + 1)) {
+		r = -EPERM;
+		goto unlock;
+	}
+
+	r = kvm_gmem_get_pfn_locked(kvm, slot, gfn, &pfn, NULL);
+	if (r)
+		goto unlock;
+
+	page = pfn_to_page(pfn);
+	memcpy(data, page_address(page) + offset, len);
+	unlock_page(page);
+	kvm_release_pfn_clean(pfn);
+unlock:
+	mutex_unlock(&kvm->slots_lock);
+
+	return r;
+}
+
+static int __kvm_write_private_guest_page(struct kvm *kvm,
+					  struct kvm_memory_slot *slot,
+					  gfn_t gfn, const void *data,
+					  int offset, int len)
+{
+	struct page *page;
+	u64 pfn;
+	int r = 0;
+
+	if (size_add(offset, len) > PAGE_SIZE)
+		return -E2BIG;
+
+	mutex_lock(&kvm->slots_lock);
+
+	if (!__kvm_gmem_is_mappable(kvm, gfn, gfn + 1)) {
+		r = -EPERM;
+		goto unlock;
+	}
+
+	r = kvm_gmem_get_pfn_locked(kvm, slot, gfn, &pfn, NULL);
+	if (r)
+		goto unlock;
+
+	page = pfn_to_page(pfn);
+	memcpy(page_address(page) + offset, data, len);
+	unlock_page(page);
+	kvm_release_pfn_dirty(pfn);
+unlock:
+	mutex_unlock(&kvm->slots_lock);
+
+	return r;
+}
+#else
+static int __kvm_read_private_guest_page(struct kvm *kvm,
+					 struct kvm_memory_slot *slot,
+					 gfn_t gfn, void *data, int offset,
+					 int len)
+{
+	WARN_ON_ONCE(1);
+	return -EIO;
+}
+
+static int __kvm_write_private_guest_page(struct kvm *kvm,
+					  struct kvm_memory_slot *slot,
+					  gfn_t gfn, const void *data,
+					  int offset, int len)
+{
+	WARN_ON_ONCE(1);
+	return -EIO;
+}
 #endif /* CONFIG_KVM_PRIVATE_MEM_MAPPABLE */
 
 /* Copy @len bytes from guest memory at '(@gfn * PAGE_SIZE) + @offset' to @data */
-static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
-				 void *data, int offset, int len)
+
+static int __kvm_read_guest_page(struct kvm *kvm, struct kvm_memory_slot *slot,
+				 gfn_t gfn, void *data, int offset, int len)
 {
-	int r;
 	unsigned long addr;
 
+	if (IS_ENABLED(CONFIG_KVM_PRIVATE_MEM_MAPPABLE) &&
+	    kvm_slot_can_be_private(slot)) {
+		return __kvm_read_private_guest_page(kvm, slot, gfn, data,
+						     offset, len);
+	}
+
 	addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
 	if (kvm_is_error_hva(addr))
 		return -EFAULT;
-	r = __copy_from_user(data, (void __user *)addr + offset, len);
-	if (r)
+	if (__copy_from_user(data, (void __user *)addr + offset, len))
 		return -EFAULT;
 	return 0;
 }
@@ -3408,7 +3496,7 @@  int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 {
 	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 
-	return __kvm_read_guest_page(slot, gfn, data, offset, len);
+	return __kvm_read_guest_page(kvm, slot, gfn, data, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_read_guest_page);
 
@@ -3417,7 +3505,7 @@  int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
 {
 	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 
-	return __kvm_read_guest_page(slot, gfn, data, offset, len);
+	return __kvm_read_guest_page(vcpu->kvm, slot, gfn, data, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page);
 
@@ -3492,17 +3580,24 @@  EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
 /* Copy @len bytes from @data into guest memory at '(@gfn * PAGE_SIZE) + @offset' */
 static int __kvm_write_guest_page(struct kvm *kvm,
 				  struct kvm_memory_slot *memslot, gfn_t gfn,
-			          const void *data, int offset, int len)
+				  const void *data, int offset, int len)
 {
-	int r;
-	unsigned long addr;
+	if (IS_ENABLED(CONFIG_KVM_PRIVATE_MEM_MAPPABLE) &&
+	    kvm_slot_can_be_private(memslot)) {
+		int r = __kvm_write_private_guest_page(kvm, memslot, gfn, data,
+						       offset, len);
+
+		if (r)
+			return r;
+	} else {
+		unsigned long addr = gfn_to_hva_memslot(memslot, gfn);
+
+		if (kvm_is_error_hva(addr))
+			return -EFAULT;
+		if (__copy_to_user((void __user *)addr + offset, data, len))
+			return -EFAULT;
+	}
 
-	addr = gfn_to_hva_memslot(memslot, gfn);
-	if (kvm_is_error_hva(addr))
-		return -EFAULT;
-	r = __copy_to_user((void __user *)addr + offset, data, len);
-	if (r)
-		return -EFAULT;
 	mark_page_dirty_in_slot(kvm, memslot, gfn);
 	return 0;
 }