diff mbox series

kvm: mmu: Fix overflow on kvm mmu page limit calculation

Message ID 20190405234232.174196-1-bgardon@google.com (mailing list archive)
State New, archived
Headers show
Series kvm: mmu: Fix overflow on kvm mmu page limit calculation | expand

Commit Message

Ben Gardon April 5, 2019, 11:42 p.m. UTC
KVM bases its memory usage limits on the total number of guest pages
across all memslots, however those limits and the calculations to
produce them use 32 bit unsigned integers. This can result in overflow
If a VM has too many pages. As a result of this overflow, KVM uses a
very low limit on the number of MMU pages it can allocate, and so
cannot map all of guest memory at once.

Tested: Ran all kvm-unit-tests on an Intel Haswell machine. This patch
	introduced no new failures.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/include/asm/kvm_host.h | 10 +++++-----
 arch/x86/kvm/mmu.c              | 13 ++++++-------
 arch/x86/kvm/mmu.h              |  2 +-
 arch/x86/kvm/x86.c              |  2 +-
 4 files changed, 13 insertions(+), 14 deletions(-)

Comments

Sean Christopherson April 8, 2019, 2:58 p.m. UTC | #1
On Fri, Apr 05, 2019 at 04:42:32PM -0700, Ben Gardon wrote:
> KVM bases its memory usage limits on the total number of guest pages
> across all memslots, however those limits and the calculations to
> produce them use 32 bit unsigned integers. This can result in overflow
> If a VM has too many pages. As a result of this overflow, KVM uses a

'If' doesn't need to be capitalized.  And it'd be helpful to call out in
the changelog that the number of pages in the memslot is stored as an
unsigned long, as opposed to "a VM has too many pages", which kind of
implies userspace misconfigured the VM.  E.g.:
  
  This results in overflow if the total number of pages across all
  memslots exceeds the capacity of a u32.

> very low limit on the number of MMU pages it can allocate, and so
> cannot map all of guest memory at once.
> 
> Tested: Ran all kvm-unit-tests on an Intel Haswell machine. This patch
> 	introduced no new failures.
> 
> Signed-off-by: Ben Gardon <bgardon@google.com>
> ---
>  arch/x86/include/asm/kvm_host.h | 10 +++++-----
>  arch/x86/kvm/mmu.c              | 13 ++++++-------
>  arch/x86/kvm/mmu.h              |  2 +-
>  arch/x86/kvm/x86.c              |  2 +-
>  4 files changed, 13 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 159b5988292f3..ba7d0b46eb46e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -844,9 +844,9 @@ enum kvm_irqchip_mode {
>  };
>  
>  struct kvm_arch {
> -	unsigned int n_used_mmu_pages;
> -	unsigned int n_requested_mmu_pages;
> -	unsigned int n_max_mmu_pages;
> +	long n_used_mmu_pages;
> +	long n_requested_mmu_pages;
> +	long n_max_mmu_pages;

This should be 'unsigned long' to match kvm_memory_slot.npages, and
because replacing 'u32' with 'long' technically reduces the number of
pages a guest can have before encounterring the wrap on 32-bit KVM,
though I highly doubt anyone cares about that scenario :-)

>  	unsigned int indirect_shadow_pages;
>  	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
>  	/*
> @@ -1256,8 +1256,8 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
>  				   gfn_t gfn_offset, unsigned long mask);
>  void kvm_mmu_zap_all(struct kvm *kvm);
>  void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
> -unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
> -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
> +long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
> +void kvm_mmu_change_mmu_pages(struct kvm *kvm, long kvm_nr_mmu_pages);
>  
>  int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
>  bool pdptrs_changed(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index eee455a8a612d..0762616bbe301 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2007,7 +2007,7 @@ static int is_empty_shadow_page(u64 *spt)
>   * aggregate version in order to make the slab shrinker
>   * faster
>   */
> -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
> +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
>  {
>  	kvm->arch.n_used_mmu_pages += nr;
>  	percpu_counter_add(&kvm_total_used_mmu_pages, nr);
> @@ -2763,7 +2763,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
>   * Changing the number of mmu pages allocated to the vm
>   * Note: if goal_nr_mmu_pages is too small, you will get dead lock
>   */
> -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
> +void kvm_mmu_change_mmu_pages(struct kvm *kvm, long goal_nr_mmu_pages)
>  {
>  	LIST_HEAD(invalid_list);
>  
> @@ -6031,10 +6031,10 @@ int kvm_mmu_module_init(void)
>  /*
>   * Calculate mmu pages needed for kvm.
>   */
> -unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
> +long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
>  {
> -	unsigned int nr_mmu_pages;
> -	unsigned int  nr_pages = 0;
> +	long nr_mmu_pages;
> +	long  nr_pages = 0;

Might as well remove that extra whitespace while you're at it.

>  	struct kvm_memslots *slots;
>  	struct kvm_memory_slot *memslot;
>  	int i;
> @@ -6047,8 +6047,7 @@ unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
>  	}
>  
>  	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
> -	nr_mmu_pages = max(nr_mmu_pages,
> -			   (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
> +	nr_mmu_pages = max_t(long, nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);

In the next version, with nr_mmu_pages becoming an 'unsigned long', it
probably makes sense to declare KVM_MIN_ALLOC_MMU_PAGES as 64UL so as to
avoid this cast (and I assume the check in kvm_vm_ioctl_set_nr_mmu_pages()
will also complain about signed vs. unsigned).

>  
>  	return nr_mmu_pages;
>  }
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index bbdc60f2fae89..e870f1027fa82 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -64,7 +64,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
>  int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
>  				u64 fault_address, char *insn, int insn_len);
>  
> -static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
> +static inline long kvm_mmu_available_pages(struct kvm *kvm)
>  {
>  	if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
>  		return kvm->arch.n_max_mmu_pages -
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 099b851dabafd..1d508de3d00d8 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4270,7 +4270,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
>  }
>  
>  static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
> -					  u32 kvm_nr_mmu_pages)
> +					 long kvm_nr_mmu_pages)
>  {
>  	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
>  		return -EINVAL;
> -- 
> 2.21.0.392.gf8f6787159e-goog
>
Ben Gardon April 8, 2019, 4:27 p.m. UTC | #2
Thank you for your feedback Sean, I'll incorporate it and send out a
v2 of this patch.

On Mon, Apr 8, 2019 at 7:59 AM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Fri, Apr 05, 2019 at 04:42:32PM -0700, Ben Gardon wrote:
> > KVM bases its memory usage limits on the total number of guest pages
> > across all memslots, however those limits and the calculations to
> > produce them use 32 bit unsigned integers. This can result in overflow
> > If a VM has too many pages. As a result of this overflow, KVM uses a
>
> 'If' doesn't need to be capitalized.  And it'd be helpful to call out in
> the changelog that the number of pages in the memslot is stored as an
> unsigned long, as opposed to "a VM has too many pages", which kind of
> implies userspace misconfigured the VM.  E.g.:
>
>   This results in overflow if the total number of pages across all
>   memslots exceeds the capacity of a u32.
>
> > very low limit on the number of MMU pages it can allocate, and so
> > cannot map all of guest memory at once.
> >
> > Tested: Ran all kvm-unit-tests on an Intel Haswell machine. This patch
> >       introduced no new failures.
> >
> > Signed-off-by: Ben Gardon <bgardon@google.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h | 10 +++++-----
> >  arch/x86/kvm/mmu.c              | 13 ++++++-------
> >  arch/x86/kvm/mmu.h              |  2 +-
> >  arch/x86/kvm/x86.c              |  2 +-
> >  4 files changed, 13 insertions(+), 14 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 159b5988292f3..ba7d0b46eb46e 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -844,9 +844,9 @@ enum kvm_irqchip_mode {
> >  };
> >
> >  struct kvm_arch {
> > -     unsigned int n_used_mmu_pages;
> > -     unsigned int n_requested_mmu_pages;
> > -     unsigned int n_max_mmu_pages;
> > +     long n_used_mmu_pages;
> > +     long n_requested_mmu_pages;
> > +     long n_max_mmu_pages;
>
> This should be 'unsigned long' to match kvm_memory_slot.npages, and
> because replacing 'u32' with 'long' technically reduces the number of
> pages a guest can have before encounterring the wrap on 32-bit KVM,
> though I highly doubt anyone cares about that scenario :-)
>
> >       unsigned int indirect_shadow_pages;
> >       struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
> >       /*
> > @@ -1256,8 +1256,8 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
> >                                  gfn_t gfn_offset, unsigned long mask);
> >  void kvm_mmu_zap_all(struct kvm *kvm);
> >  void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
> > -unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
> > -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
> > +long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
> > +void kvm_mmu_change_mmu_pages(struct kvm *kvm, long kvm_nr_mmu_pages);
> >
> >  int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
> >  bool pdptrs_changed(struct kvm_vcpu *vcpu);
> > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> > index eee455a8a612d..0762616bbe301 100644
> > --- a/arch/x86/kvm/mmu.c
> > +++ b/arch/x86/kvm/mmu.c
> > @@ -2007,7 +2007,7 @@ static int is_empty_shadow_page(u64 *spt)
> >   * aggregate version in order to make the slab shrinker
> >   * faster
> >   */
> > -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
> > +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
> >  {
> >       kvm->arch.n_used_mmu_pages += nr;
> >       percpu_counter_add(&kvm_total_used_mmu_pages, nr);
> > @@ -2763,7 +2763,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
> >   * Changing the number of mmu pages allocated to the vm
> >   * Note: if goal_nr_mmu_pages is too small, you will get dead lock
> >   */
> > -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
> > +void kvm_mmu_change_mmu_pages(struct kvm *kvm, long goal_nr_mmu_pages)
> >  {
> >       LIST_HEAD(invalid_list);
> >
> > @@ -6031,10 +6031,10 @@ int kvm_mmu_module_init(void)
> >  /*
> >   * Calculate mmu pages needed for kvm.
> >   */
> > -unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
> > +long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
> >  {
> > -     unsigned int nr_mmu_pages;
> > -     unsigned int  nr_pages = 0;
> > +     long nr_mmu_pages;
> > +     long  nr_pages = 0;
>
> Might as well remove that extra whitespace while you're at it.
>
> >       struct kvm_memslots *slots;
> >       struct kvm_memory_slot *memslot;
> >       int i;
> > @@ -6047,8 +6047,7 @@ unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
> >       }
> >
> >       nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
> > -     nr_mmu_pages = max(nr_mmu_pages,
> > -                        (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
> > +     nr_mmu_pages = max_t(long, nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
>
> In the next version, with nr_mmu_pages becoming an 'unsigned long', it
> probably makes sense to declare KVM_MIN_ALLOC_MMU_PAGES as 64UL so as to
> avoid this cast (and I assume the check in kvm_vm_ioctl_set_nr_mmu_pages()
> will also complain about signed vs. unsigned).
>
> >
> >       return nr_mmu_pages;
> >  }
> > diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> > index bbdc60f2fae89..e870f1027fa82 100644
> > --- a/arch/x86/kvm/mmu.h
> > +++ b/arch/x86/kvm/mmu.h
> > @@ -64,7 +64,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
> >  int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
> >                               u64 fault_address, char *insn, int insn_len);
> >
> > -static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
> > +static inline long kvm_mmu_available_pages(struct kvm *kvm)
> >  {
> >       if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
> >               return kvm->arch.n_max_mmu_pages -
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 099b851dabafd..1d508de3d00d8 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -4270,7 +4270,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
> >  }
> >
> >  static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
> > -                                       u32 kvm_nr_mmu_pages)
> > +                                      long kvm_nr_mmu_pages)
> >  {
> >       if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
> >               return -EINVAL;
> > --
> > 2.21.0.392.gf8f6787159e-goog
> >
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 159b5988292f3..ba7d0b46eb46e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -844,9 +844,9 @@  enum kvm_irqchip_mode {
 };
 
 struct kvm_arch {
-	unsigned int n_used_mmu_pages;
-	unsigned int n_requested_mmu_pages;
-	unsigned int n_max_mmu_pages;
+	long n_used_mmu_pages;
+	long n_requested_mmu_pages;
+	long n_max_mmu_pages;
 	unsigned int indirect_shadow_pages;
 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 	/*
@@ -1256,8 +1256,8 @@  void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 				   gfn_t gfn_offset, unsigned long mask);
 void kvm_mmu_zap_all(struct kvm *kvm);
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
-unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
+long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, long kvm_nr_mmu_pages);
 
 int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
 bool pdptrs_changed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eee455a8a612d..0762616bbe301 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2007,7 +2007,7 @@  static int is_empty_shadow_page(u64 *spt)
  * aggregate version in order to make the slab shrinker
  * faster
  */
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
 {
 	kvm->arch.n_used_mmu_pages += nr;
 	percpu_counter_add(&kvm_total_used_mmu_pages, nr);
@@ -2763,7 +2763,7 @@  static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
  * Changing the number of mmu pages allocated to the vm
  * Note: if goal_nr_mmu_pages is too small, you will get dead lock
  */
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, long goal_nr_mmu_pages)
 {
 	LIST_HEAD(invalid_list);
 
@@ -6031,10 +6031,10 @@  int kvm_mmu_module_init(void)
 /*
  * Calculate mmu pages needed for kvm.
  */
-unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
+long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
 {
-	unsigned int nr_mmu_pages;
-	unsigned int  nr_pages = 0;
+	long nr_mmu_pages;
+	long  nr_pages = 0;
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
 	int i;
@@ -6047,8 +6047,7 @@  unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
 	}
 
 	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
-	nr_mmu_pages = max(nr_mmu_pages,
-			   (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+	nr_mmu_pages = max_t(long, nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
 
 	return nr_mmu_pages;
 }
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index bbdc60f2fae89..e870f1027fa82 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -64,7 +64,7 @@  bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
 int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
 				u64 fault_address, char *insn, int insn_len);
 
-static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
+static inline long kvm_mmu_available_pages(struct kvm *kvm)
 {
 	if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
 		return kvm->arch.n_max_mmu_pages -
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 099b851dabafd..1d508de3d00d8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4270,7 +4270,7 @@  static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
 }
 
 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
-					  u32 kvm_nr_mmu_pages)
+					 long kvm_nr_mmu_pages)
 {
 	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
 		return -EINVAL;