diff mbox series

[RFC,06/16] KVM: Use GUP instead of copy_from/to_user() to access guest memory

Message ID 20200522125214.31348-7-kirill.shutemov@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series KVM protected memory extension | expand

Commit Message

Kirill A . Shutemov May 22, 2020, 12:52 p.m. UTC
New helpers copy_from_guest()/copy_to_guest() to be used if KVM memory
protection feature is enabled.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 include/linux/kvm_host.h |  4 +++
 virt/kvm/kvm_main.c      | 78 ++++++++++++++++++++++++++++++++++------
 2 files changed, 72 insertions(+), 10 deletions(-)

Comments

Vitaly Kuznetsov May 25, 2020, 3:08 p.m. UTC | #1
"Kirill A. Shutemov" <kirill@shutemov.name> writes:

> New helpers copy_from_guest()/copy_to_guest() to be used if KVM memory
> protection feature is enabled.
>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  include/linux/kvm_host.h |  4 +++
>  virt/kvm/kvm_main.c      | 78 ++++++++++++++++++++++++++++++++++------
>  2 files changed, 72 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 131cc1527d68..bd0bb600f610 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -503,6 +503,7 @@ struct kvm {
>  	struct srcu_struct srcu;
>  	struct srcu_struct irq_srcu;
>  	pid_t userspace_pid;
> +	bool mem_protected;
>  };
>  
>  #define kvm_err(fmt, ...) \
> @@ -727,6 +728,9 @@ void kvm_set_pfn_dirty(kvm_pfn_t pfn);
>  void kvm_set_pfn_accessed(kvm_pfn_t pfn);
>  void kvm_get_pfn(kvm_pfn_t pfn);
>  
> +int copy_from_guest(void *data, unsigned long hva, int len);
> +int copy_to_guest(unsigned long hva, const void *data, int len);
> +
>  void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
>  int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
>  			int len);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 731c1e517716..033471f71dae 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2248,8 +2248,48 @@ static int next_segment(unsigned long len, int offset)
>  		return len;
>  }
>  
> +int copy_from_guest(void *data, unsigned long hva, int len)
> +{
> +	int offset = offset_in_page(hva);
> +	struct page *page;
> +	int npages, seg;
> +
> +	while ((seg = next_segment(len, offset)) != 0) {
> +		npages = get_user_pages_unlocked(hva, 1, &page, 0);
> +		if (npages != 1)
> +			return -EFAULT;
> +		memcpy(data, page_address(page) + offset, seg);
> +		put_page(page);
> +		len -= seg;
> +		hva += seg;
> +		offset = 0;
> +	}
> +
> +	return 0;
> +}
> +
> +int copy_to_guest(unsigned long hva, const void *data, int len)
> +{
> +	int offset = offset_in_page(hva);
> +	struct page *page;
> +	int npages, seg;
> +
> +	while ((seg = next_segment(len, offset)) != 0) {
> +		npages = get_user_pages_unlocked(hva, 1, &page, FOLL_WRITE);
> +		if (npages != 1)
> +			return -EFAULT;
> +		memcpy(page_address(page) + offset, data, seg);
> +		put_page(page);
> +		len -= seg;
> +		hva += seg;
> +		offset = 0;
> +	}
> +	return 0;
> +}
> +
>  static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
> -				 void *data, int offset, int len)
> +				 void *data, int offset, int len,
> +				 bool protected)
>  {
>  	int r;
>  	unsigned long addr;
> @@ -2257,7 +2297,10 @@ static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
>  	addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
>  	if (kvm_is_error_hva(addr))
>  		return -EFAULT;
> -	r = __copy_from_user(data, (void __user *)addr + offset, len);
> +	if (protected)
> +		r = copy_from_guest(data, addr + offset, len);
> +	else
> +		r = __copy_from_user(data, (void __user *)addr + offset, len);
>  	if (r)
>  		return -EFAULT;
>  	return 0;
> @@ -2268,7 +2311,8 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
>  {
>  	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
>  
> -	return __kvm_read_guest_page(slot, gfn, data, offset, len);
> +	return __kvm_read_guest_page(slot, gfn, data, offset, len,
> +				     kvm->mem_protected);
>  }
>  EXPORT_SYMBOL_GPL(kvm_read_guest_page);
>  
> @@ -2277,7 +2321,8 @@ int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
>  {
>  	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
>  
> -	return __kvm_read_guest_page(slot, gfn, data, offset, len);
> +	return __kvm_read_guest_page(slot, gfn, data, offset, len,
> +				     vcpu->kvm->mem_protected);

Personally, I would've just added 'struct kvm' pointer to 'struct
kvm_memory_slot' to be able to extract 'mem_protected' info when
needed. This will make the patch much smaller.

>  }
>  EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page);
>  
> @@ -2350,7 +2395,8 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
>  EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
>  
>  static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
> -			          const void *data, int offset, int len)
> +			          const void *data, int offset, int len,
> +				  bool protected)
>  {
>  	int r;
>  	unsigned long addr;
> @@ -2358,7 +2404,11 @@ static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
>  	addr = gfn_to_hva_memslot(memslot, gfn);
>  	if (kvm_is_error_hva(addr))
>  		return -EFAULT;
> -	r = __copy_to_user((void __user *)addr + offset, data, len);
> +
> +	if (protected)
> +		r = copy_to_guest(addr + offset, data, len);
> +	else
> +		r = __copy_to_user((void __user *)addr + offset, data, len);

All users of copy_to_guest() will have to have the same 'if (protected)'
check, right? Why not move the check to copy_to/from_guest() then?

>  	if (r)
>  		return -EFAULT;
>  	mark_page_dirty_in_slot(memslot, gfn);
> @@ -2370,7 +2420,8 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
>  {
>  	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
>  
> -	return __kvm_write_guest_page(slot, gfn, data, offset, len);
> +	return __kvm_write_guest_page(slot, gfn, data, offset, len,
> +				      kvm->mem_protected);
>  }
>  EXPORT_SYMBOL_GPL(kvm_write_guest_page);
>  
> @@ -2379,7 +2430,8 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
>  {
>  	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
>  
> -	return __kvm_write_guest_page(slot, gfn, data, offset, len);
> +	return __kvm_write_guest_page(slot, gfn, data, offset, len,
> +				      vcpu->kvm->mem_protected);
>  }
>  EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
>  
> @@ -2495,7 +2547,10 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
>  	if (unlikely(!ghc->memslot))
>  		return kvm_write_guest(kvm, gpa, data, len);
>  
> -	r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
> +	if (kvm->mem_protected)
> +		r = copy_to_guest(ghc->hva + offset, data, len);
> +	else
> +		r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
>  	if (r)
>  		return -EFAULT;
>  	mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
> @@ -2530,7 +2585,10 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
>  	if (unlikely(!ghc->memslot))
>  		return kvm_read_guest(kvm, ghc->gpa, data, len);
>  
> -	r = __copy_from_user(data, (void __user *)ghc->hva, len);
> +	if (kvm->mem_protected)
> +		r = copy_from_guest(data, ghc->hva, len);
> +	else
> +		r = __copy_from_user(data, (void __user *)ghc->hva, len);
>  	if (r)
>  		return -EFAULT;
Kirill A . Shutemov May 25, 2020, 3:17 p.m. UTC | #2
On Mon, May 25, 2020 at 05:08:43PM +0200, Vitaly Kuznetsov wrote:
> "Kirill A. Shutemov" <kirill@shutemov.name> writes:
> 
> > New helpers copy_from_guest()/copy_to_guest() to be used if KVM memory
> > protection feature is enabled.
> >
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > ---
> >  include/linux/kvm_host.h |  4 +++
> >  virt/kvm/kvm_main.c      | 78 ++++++++++++++++++++++++++++++++++------
> >  2 files changed, 72 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 131cc1527d68..bd0bb600f610 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -503,6 +503,7 @@ struct kvm {
> >  	struct srcu_struct srcu;
> >  	struct srcu_struct irq_srcu;
> >  	pid_t userspace_pid;
> > +	bool mem_protected;
> >  };
> >  
> >  #define kvm_err(fmt, ...) \
> > @@ -727,6 +728,9 @@ void kvm_set_pfn_dirty(kvm_pfn_t pfn);
> >  void kvm_set_pfn_accessed(kvm_pfn_t pfn);
> >  void kvm_get_pfn(kvm_pfn_t pfn);
> >  
> > +int copy_from_guest(void *data, unsigned long hva, int len);
> > +int copy_to_guest(unsigned long hva, const void *data, int len);
> > +
> >  void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
> >  int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
> >  			int len);
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 731c1e517716..033471f71dae 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -2248,8 +2248,48 @@ static int next_segment(unsigned long len, int offset)
> >  		return len;
> >  }
> >  
> > +int copy_from_guest(void *data, unsigned long hva, int len)
> > +{
> > +	int offset = offset_in_page(hva);
> > +	struct page *page;
> > +	int npages, seg;
> > +
> > +	while ((seg = next_segment(len, offset)) != 0) {
> > +		npages = get_user_pages_unlocked(hva, 1, &page, 0);
> > +		if (npages != 1)
> > +			return -EFAULT;
> > +		memcpy(data, page_address(page) + offset, seg);
> > +		put_page(page);
> > +		len -= seg;
> > +		hva += seg;
> > +		offset = 0;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +int copy_to_guest(unsigned long hva, const void *data, int len)
> > +{
> > +	int offset = offset_in_page(hva);
> > +	struct page *page;
> > +	int npages, seg;
> > +
> > +	while ((seg = next_segment(len, offset)) != 0) {
> > +		npages = get_user_pages_unlocked(hva, 1, &page, FOLL_WRITE);
> > +		if (npages != 1)
> > +			return -EFAULT;
> > +		memcpy(page_address(page) + offset, data, seg);
> > +		put_page(page);
> > +		len -= seg;
> > +		hva += seg;
> > +		offset = 0;
> > +	}
> > +	return 0;
> > +}
> > +
> >  static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
> > -				 void *data, int offset, int len)
> > +				 void *data, int offset, int len,
> > +				 bool protected)
> >  {
> >  	int r;
> >  	unsigned long addr;
> > @@ -2257,7 +2297,10 @@ static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
> >  	addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
> >  	if (kvm_is_error_hva(addr))
> >  		return -EFAULT;
> > -	r = __copy_from_user(data, (void __user *)addr + offset, len);
> > +	if (protected)
> > +		r = copy_from_guest(data, addr + offset, len);
> > +	else
> > +		r = __copy_from_user(data, (void __user *)addr + offset, len);
> >  	if (r)
> >  		return -EFAULT;
> >  	return 0;
> > @@ -2268,7 +2311,8 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
> >  {
> >  	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
> >  
> > -	return __kvm_read_guest_page(slot, gfn, data, offset, len);
> > +	return __kvm_read_guest_page(slot, gfn, data, offset, len,
> > +				     kvm->mem_protected);
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_read_guest_page);
> >  
> > @@ -2277,7 +2321,8 @@ int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
> >  {
> >  	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
> >  
> > -	return __kvm_read_guest_page(slot, gfn, data, offset, len);
> > +	return __kvm_read_guest_page(slot, gfn, data, offset, len,
> > +				     vcpu->kvm->mem_protected);
> 
> Personally, I would've just added 'struct kvm' pointer to 'struct
> kvm_memory_slot' to be able to extract 'mem_protected' info when
> needed. This will make the patch much smaller.

Okay, can do.

Other thing I tried is to have per-slot flag to indicate that it's
protected. But Sean pointed that it's all-or-nothing feature and having
the flag in the slot would be misleading.

> >  }
> >  EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page);
> >  
> > @@ -2350,7 +2395,8 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
> >  EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
> >  
> >  static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
> > -			          const void *data, int offset, int len)
> > +			          const void *data, int offset, int len,
> > +				  bool protected)
> >  {
> >  	int r;
> >  	unsigned long addr;
> > @@ -2358,7 +2404,11 @@ static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
> >  	addr = gfn_to_hva_memslot(memslot, gfn);
> >  	if (kvm_is_error_hva(addr))
> >  		return -EFAULT;
> > -	r = __copy_to_user((void __user *)addr + offset, data, len);
> > +
> > +	if (protected)
> > +		r = copy_to_guest(addr + offset, data, len);
> > +	else
> > +		r = __copy_to_user((void __user *)addr + offset, data, len);
> 
> All users of copy_to_guest() will have to have the same 'if (protected)'
> check, right? Why not move the check to copy_to/from_guest() then?

Good point.

> >  	if (r)
> >  		return -EFAULT;
> >  	mark_page_dirty_in_slot(memslot, gfn);
> > @@ -2370,7 +2420,8 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
> >  {
> >  	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
> >  
> > -	return __kvm_write_guest_page(slot, gfn, data, offset, len);
> > +	return __kvm_write_guest_page(slot, gfn, data, offset, len,
> > +				      kvm->mem_protected);
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_write_guest_page);
> >  
> > @@ -2379,7 +2430,8 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
> >  {
> >  	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
> >  
> > -	return __kvm_write_guest_page(slot, gfn, data, offset, len);
> > +	return __kvm_write_guest_page(slot, gfn, data, offset, len,
> > +				      vcpu->kvm->mem_protected);
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
> >  
> > @@ -2495,7 +2547,10 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
> >  	if (unlikely(!ghc->memslot))
> >  		return kvm_write_guest(kvm, gpa, data, len);
> >  
> > -	r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
> > +	if (kvm->mem_protected)
> > +		r = copy_to_guest(ghc->hva + offset, data, len);
> > +	else
> > +		r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
> >  	if (r)
> >  		return -EFAULT;
> >  	mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
> > @@ -2530,7 +2585,10 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
> >  	if (unlikely(!ghc->memslot))
> >  		return kvm_read_guest(kvm, ghc->gpa, data, len);
> >  
> > -	r = __copy_from_user(data, (void __user *)ghc->hva, len);
> > +	if (kvm->mem_protected)
> > +		r = copy_from_guest(data, ghc->hva, len);
> > +	else
> > +		r = __copy_from_user(data, (void __user *)ghc->hva, len);
> >  	if (r)
> >  		return -EFAULT;
> 
> -- 
> Vitaly
> 
>
Mike Rapoport May 26, 2020, 6:14 a.m. UTC | #3
On Fri, May 22, 2020 at 03:52:04PM +0300, Kirill A. Shutemov wrote:
> New helpers copy_from_guest()/copy_to_guest() to be used if KVM memory
> protection feature is enabled.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  include/linux/kvm_host.h |  4 +++
>  virt/kvm/kvm_main.c      | 78 ++++++++++++++++++++++++++++++++++------
>  2 files changed, 72 insertions(+), 10 deletions(-)
> 
>  static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
> -				 void *data, int offset, int len)
> +				 void *data, int offset, int len,
> +				 bool protected)
>  {
>  	int r;
>  	unsigned long addr;
> @@ -2257,7 +2297,10 @@ static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
>  	addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
>  	if (kvm_is_error_hva(addr))
>  		return -EFAULT;
> -	r = __copy_from_user(data, (void __user *)addr + offset, len);
> +	if (protected)
> +		r = copy_from_guest(data, addr + offset, len);
> +	else
> +		r = __copy_from_user(data, (void __user *)addr + offset, len);

Maybe always use copy_{from,to}_guest() and move the 'if (protected)'
there?
If kvm is added to memory slot, it cab be the passed to copy_{to,from}_guest.

>  	if (r)
>  		return -EFAULT;
>  	return 0;
> @@ -2268,7 +2311,8 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
>  {
>  	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
>  
> -	return __kvm_read_guest_page(slot, gfn, data, offset, len);
> +	return __kvm_read_guest_page(slot, gfn, data, offset, len,
> +				     kvm->mem_protected);
>  }
>  EXPORT_SYMBOL_GPL(kvm_read_guest_page);
>  
> @@ -2277,7 +2321,8 @@ int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
>  {
>  	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
>  
> -	return __kvm_read_guest_page(slot, gfn, data, offset, len);
> +	return __kvm_read_guest_page(slot, gfn, data, offset, len,
> +				     vcpu->kvm->mem_protected);
>  }
>  EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page);
>
Kirill A . Shutemov May 26, 2020, 9:56 p.m. UTC | #4
On Tue, May 26, 2020 at 09:14:59AM +0300, Mike Rapoport wrote:
> On Fri, May 22, 2020 at 03:52:04PM +0300, Kirill A. Shutemov wrote:
> > New helpers copy_from_guest()/copy_to_guest() to be used if KVM memory
> > protection feature is enabled.
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > ---
> >  include/linux/kvm_host.h |  4 +++
> >  virt/kvm/kvm_main.c      | 78 ++++++++++++++++++++++++++++++++++------
> >  2 files changed, 72 insertions(+), 10 deletions(-)
> > 
> >  static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
> > -				 void *data, int offset, int len)
> > +				 void *data, int offset, int len,
> > +				 bool protected)
> >  {
> >  	int r;
> >  	unsigned long addr;
> > @@ -2257,7 +2297,10 @@ static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
> >  	addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
> >  	if (kvm_is_error_hva(addr))
> >  		return -EFAULT;
> > -	r = __copy_from_user(data, (void __user *)addr + offset, len);
> > +	if (protected)
> > +		r = copy_from_guest(data, addr + offset, len);
> > +	else
> > +		r = __copy_from_user(data, (void __user *)addr + offset, len);
> 
> Maybe always use copy_{from,to}_guest() and move the 'if (protected)'
> there?
> If kvm is added to memory slot, it cab be the passed to copy_{to,from}_guest.

Right, Vitaly has pointed me to this already.
Kees Cook May 29, 2020, 3:24 p.m. UTC | #5
On Fri, May 22, 2020 at 03:52:04PM +0300, Kirill A. Shutemov wrote:
> +int copy_from_guest(void *data, unsigned long hva, int len)
> +{
> +	int offset = offset_in_page(hva);
> +	struct page *page;
> +	int npages, seg;
> +
> +	while ((seg = next_segment(len, offset)) != 0) {
> +		npages = get_user_pages_unlocked(hva, 1, &page, 0);
> +		if (npages != 1)
> +			return -EFAULT;
> +		memcpy(data, page_address(page) + offset, seg);
> +		put_page(page);
> +		len -= seg;
> +		hva += seg;
> +		offset = 0;
> +	}
> +
> +	return 0;
> +}
> +
> +int copy_to_guest(unsigned long hva, const void *data, int len)
> +{
> +	int offset = offset_in_page(hva);
> +	struct page *page;
> +	int npages, seg;
> +
> +	while ((seg = next_segment(len, offset)) != 0) {
> +		npages = get_user_pages_unlocked(hva, 1, &page, FOLL_WRITE);
> +		if (npages != 1)
> +			return -EFAULT;
> +		memcpy(page_address(page) + offset, data, seg);
> +		put_page(page);
> +		len -= seg;
> +		hva += seg;
> +		offset = 0;
> +	}
> +	return 0;
> +}
> +
>  static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
> -				 void *data, int offset, int len)
> +				 void *data, int offset, int len,
> +				 bool protected)
>  {
>  	int r;
>  	unsigned long addr;
> @@ -2257,7 +2297,10 @@ static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
>  	addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
>  	if (kvm_is_error_hva(addr))
>  		return -EFAULT;
> -	r = __copy_from_user(data, (void __user *)addr + offset, len);
> +	if (protected)
> +		r = copy_from_guest(data, addr + offset, len);
> +	else
> +		r = __copy_from_user(data, (void __user *)addr + offset, len);
>  	if (r)
>  		return -EFAULT;
>  	return 0;

This ends up removing KASAN and object size tests. Compare to:

__copy_from_user(void *to, const void __user *from, unsigned long n)
{
        might_fault();
        kasan_check_write(to, n);
        check_object_size(to, n, false);
        return raw_copy_from_user(to, from, n);
}

Those will need to get added back. :)

Additionally, I see that copy_from_guest() neither clears the
destination memory on a short read, nor does KVM actually handle the
short read case correctly now. See the notes in uaccess.h:

 * NOTE: only copy_from_user() zero-pads the destination in case of short copy.
 * Neither __copy_from_user() nor __copy_from_user_inatomic() zero anything
 * at all; their callers absolutely must check the return value.

It's not clear to me how the destination buffers get reused, but the has
the potential to leak kernel memory contents. This needs separate
fixing, I think.

-Kees
Paolo Bonzini June 1, 2020, 4:35 p.m. UTC | #6
On 25/05/20 17:17, Kirill A. Shutemov wrote:
>> Personally, I would've just added 'struct kvm' pointer to 'struct
>> kvm_memory_slot' to be able to extract 'mem_protected' info when
>> needed. This will make the patch much smaller.
> Okay, can do.
> 
> Other thing I tried is to have per-slot flag to indicate that it's
> protected. But Sean pointed that it's all-or-nothing feature and having
> the flag in the slot would be misleading.
> 

Perhaps it would be misleading, but it's an optimization.  Saving a
pointer dereference can be worth it, also because there are some places
where we just pass around a memslot and we don't have the struct kvm*.

Also, it's an all-or-nothing feature _now_.  It doesn't have to remain
that way.

Paolo
Kirill A . Shutemov June 2, 2020, 1:33 p.m. UTC | #7
On Mon, Jun 01, 2020 at 06:35:22PM +0200, Paolo Bonzini wrote:
> On 25/05/20 17:17, Kirill A. Shutemov wrote:
> >> Personally, I would've just added 'struct kvm' pointer to 'struct
> >> kvm_memory_slot' to be able to extract 'mem_protected' info when
> >> needed. This will make the patch much smaller.
> > Okay, can do.
> > 
> > Other thing I tried is to have per-slot flag to indicate that it's
> > protected. But Sean pointed that it's all-or-nothing feature and having
> > the flag in the slot would be misleading.
> > 
> 
> Perhaps it would be misleading, but it's an optimization.  Saving a
> pointer dereference can be worth it, also because there are some places
> where we just pass around a memslot and we don't have the struct kvm*.

Vitaly proposed to add struct kvm pointer into memslot. Do you object
against it?
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 131cc1527d68..bd0bb600f610 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -503,6 +503,7 @@  struct kvm {
 	struct srcu_struct srcu;
 	struct srcu_struct irq_srcu;
 	pid_t userspace_pid;
+	bool mem_protected;
 };
 
 #define kvm_err(fmt, ...) \
@@ -727,6 +728,9 @@  void kvm_set_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_accessed(kvm_pfn_t pfn);
 void kvm_get_pfn(kvm_pfn_t pfn);
 
+int copy_from_guest(void *data, unsigned long hva, int len);
+int copy_to_guest(unsigned long hva, const void *data, int len);
+
 void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 			int len);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 731c1e517716..033471f71dae 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2248,8 +2248,48 @@  static int next_segment(unsigned long len, int offset)
 		return len;
 }
 
+int copy_from_guest(void *data, unsigned long hva, int len)
+{
+	int offset = offset_in_page(hva);
+	struct page *page;
+	int npages, seg;
+
+	while ((seg = next_segment(len, offset)) != 0) {
+		npages = get_user_pages_unlocked(hva, 1, &page, 0);
+		if (npages != 1)
+			return -EFAULT;
+		memcpy(data, page_address(page) + offset, seg);
+		put_page(page);
+		len -= seg;
+		hva += seg;
+		offset = 0;
+	}
+
+	return 0;
+}
+
+int copy_to_guest(unsigned long hva, const void *data, int len)
+{
+	int offset = offset_in_page(hva);
+	struct page *page;
+	int npages, seg;
+
+	while ((seg = next_segment(len, offset)) != 0) {
+		npages = get_user_pages_unlocked(hva, 1, &page, FOLL_WRITE);
+		if (npages != 1)
+			return -EFAULT;
+		memcpy(page_address(page) + offset, data, seg);
+		put_page(page);
+		len -= seg;
+		hva += seg;
+		offset = 0;
+	}
+	return 0;
+}
+
 static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
-				 void *data, int offset, int len)
+				 void *data, int offset, int len,
+				 bool protected)
 {
 	int r;
 	unsigned long addr;
@@ -2257,7 +2297,10 @@  static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
 	addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
 	if (kvm_is_error_hva(addr))
 		return -EFAULT;
-	r = __copy_from_user(data, (void __user *)addr + offset, len);
+	if (protected)
+		r = copy_from_guest(data, addr + offset, len);
+	else
+		r = __copy_from_user(data, (void __user *)addr + offset, len);
 	if (r)
 		return -EFAULT;
 	return 0;
@@ -2268,7 +2311,8 @@  int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 {
 	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 
-	return __kvm_read_guest_page(slot, gfn, data, offset, len);
+	return __kvm_read_guest_page(slot, gfn, data, offset, len,
+				     kvm->mem_protected);
 }
 EXPORT_SYMBOL_GPL(kvm_read_guest_page);
 
@@ -2277,7 +2321,8 @@  int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
 {
 	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 
-	return __kvm_read_guest_page(slot, gfn, data, offset, len);
+	return __kvm_read_guest_page(slot, gfn, data, offset, len,
+				     vcpu->kvm->mem_protected);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page);
 
@@ -2350,7 +2395,8 @@  int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
 EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
 
 static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
-			          const void *data, int offset, int len)
+			          const void *data, int offset, int len,
+				  bool protected)
 {
 	int r;
 	unsigned long addr;
@@ -2358,7 +2404,11 @@  static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
 	addr = gfn_to_hva_memslot(memslot, gfn);
 	if (kvm_is_error_hva(addr))
 		return -EFAULT;
-	r = __copy_to_user((void __user *)addr + offset, data, len);
+
+	if (protected)
+		r = copy_to_guest(addr + offset, data, len);
+	else
+		r = __copy_to_user((void __user *)addr + offset, data, len);
 	if (r)
 		return -EFAULT;
 	mark_page_dirty_in_slot(memslot, gfn);
@@ -2370,7 +2420,8 @@  int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
 {
 	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 
-	return __kvm_write_guest_page(slot, gfn, data, offset, len);
+	return __kvm_write_guest_page(slot, gfn, data, offset, len,
+				      kvm->mem_protected);
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_page);
 
@@ -2379,7 +2430,8 @@  int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
 {
 	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 
-	return __kvm_write_guest_page(slot, gfn, data, offset, len);
+	return __kvm_write_guest_page(slot, gfn, data, offset, len,
+				      vcpu->kvm->mem_protected);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
 
@@ -2495,7 +2547,10 @@  int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 	if (unlikely(!ghc->memslot))
 		return kvm_write_guest(kvm, gpa, data, len);
 
-	r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
+	if (kvm->mem_protected)
+		r = copy_to_guest(ghc->hva + offset, data, len);
+	else
+		r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
 	if (r)
 		return -EFAULT;
 	mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
@@ -2530,7 +2585,10 @@  int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 	if (unlikely(!ghc->memslot))
 		return kvm_read_guest(kvm, ghc->gpa, data, len);
 
-	r = __copy_from_user(data, (void __user *)ghc->hva, len);
+	if (kvm->mem_protected)
+		r = copy_from_guest(data, ghc->hva, len);
+	else
+		r = __copy_from_user(data, (void __user *)ghc->hva, len);
 	if (r)
 		return -EFAULT;