diff mbox series

[v2,7/7] KVM: x86/mmu: Lazily allocate memslot rmaps

Message ID 20210429211833.3361994-8-bgardon@google.com (mailing list archive)
State New, archived
Headers show
Series Lazily allocate memslot rmaps | expand

Commit Message

Ben Gardon April 29, 2021, 9:18 p.m. UTC
If the TDP MMU is in use, wait to allocate the rmaps until the shadow
MMU is actually used. (i.e. a nested VM is launched.) This saves memory
equal to 0.2% of guest memory in cases where the TDP MMU is used and
there are no nested guests involved.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/include/asm/kvm_host.h | 11 +++++++
 arch/x86/kvm/mmu/mmu.c          | 21 +++++++++++--
 arch/x86/kvm/mmu/mmu_internal.h |  2 +-
 arch/x86/kvm/x86.c              | 54 ++++++++++++++++++++++++++++++---
 4 files changed, 80 insertions(+), 8 deletions(-)

Comments

Paolo Bonzini May 3, 2021, 1:42 p.m. UTC | #1
On 29/04/21 23:18, Ben Gardon wrote:
> +int alloc_memslots_rmaps(struct kvm *kvm, struct kvm_memslots *slots)

This can be static, can't it?

Paolo

> +{
> +	struct kvm_memory_slot *slot;
> +	int r = 0;
> +
> +	kvm_for_each_memslot(slot, slots) {
> +		r = alloc_memslot_rmap(kvm, slot, slot->npages);
> +		if (r)
> +			break;
> +	}
> +	return r;
> +}
> +
Ben Gardon May 3, 2021, 5:29 p.m. UTC | #2
On Mon, May 3, 2021 at 6:42 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 29/04/21 23:18, Ben Gardon wrote:
> > +int alloc_memslots_rmaps(struct kvm *kvm, struct kvm_memslots *slots)
>
> This can be static, can't it?

Ah, yes. Absolutely.

>
> Paolo
>
> > +{
> > +     struct kvm_memory_slot *slot;
> > +     int r = 0;
> > +
> > +     kvm_for_each_memslot(slot, slots) {
> > +             r = alloc_memslot_rmap(kvm, slot, slot->npages);
> > +             if (r)
> > +                     break;
> > +     }
> > +     return r;
> > +}
> > +
>
Sean Christopherson May 4, 2021, 8:13 p.m. UTC | #3
On Thu, Apr 29, 2021, Ben Gardon wrote:
> If the TDP MMU is in use, wait to allocate the rmaps until the shadow
> MMU is actually used. (i.e. a nested VM is launched.) This saves memory
> equal to 0.2% of guest memory in cases where the TDP MMU is used and
> there are no nested guests involved.
> 
> Signed-off-by: Ben Gardon <bgardon@google.com>
> ---
>  arch/x86/include/asm/kvm_host.h | 11 +++++++
>  arch/x86/kvm/mmu/mmu.c          | 21 +++++++++++--
>  arch/x86/kvm/mmu/mmu_internal.h |  2 +-
>  arch/x86/kvm/x86.c              | 54 ++++++++++++++++++++++++++++++---
>  4 files changed, 80 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3900dcf2439e..b8633ed00a6a 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1124,6 +1124,15 @@ struct kvm_arch {
>  #endif /* CONFIG_X86_64 */
>  
>  	bool shadow_mmu_active;
> +
> +	/*
> +	 * If set, the rmap should be allocated for any newly created or
> +	 * modified memslots. If allocating rmaps lazily, this may be set
> +	 * before the rmaps are allocated for existing memslots, but
> +	 * shadow_mmu_active will not be set until after the rmaps are fully
> +	 * allocated.
> +	 */
> +	bool alloc_memslot_rmaps;

Maybe "need_rmaps" or "need_memslot_rmaps"?

>  };
>  
>  struct kvm_vm_stat {
> @@ -1855,4 +1864,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
>  
>  int kvm_cpu_dirty_log_size(void);
>  
> +int alloc_all_memslots_rmaps(struct kvm *kvm);
> +
>  #endif /* _ASM_X86_KVM_HOST_H */
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e252af46f205..b2a6585bd978 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3125,9 +3125,17 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>  	return ret;
>  }
>  
> -void activate_shadow_mmu(struct kvm *kvm)
> +int activate_shadow_mmu(struct kvm *kvm)
>  {
> +	int r;
> +
> +	r = alloc_all_memslots_rmaps(kvm);
> +	if (r)
> +		return r;
> +
>  kvm->arch.shadow_mmu_active = true;

If shadow_mmu_active goes away, so does this helper.

> +
> +	return 0;
>  }
>  
>  static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
> @@ -3300,7 +3308,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
>  		}
>  	}
>  
> -	activate_shadow_mmu(vcpu->kvm);
> +	r = activate_shadow_mmu(vcpu->kvm);
> +	if (r)
> +		return r;
>  
>  	write_lock(&vcpu->kvm->mmu_lock);
>  	r = make_mmu_pages_available(vcpu);
> @@ -5491,7 +5501,12 @@ void kvm_mmu_init_vm(struct kvm *kvm)
>  	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
>  
>  	if (!kvm_mmu_init_tdp_mmu(kvm))
> -		activate_shadow_mmu(kvm);
> +		/*
> +		 * No memslots can have been allocated at this point.
> +		 * activate_shadow_mmu won't actually need to allocate
> +		 * rmaps, so it cannot fail.
> +		 */
> +		WARN_ON(activate_shadow_mmu(kvm));

This is where I really don't like calling the full flow.  VM init is already
special, I don't see any harm in open coding the setting of the flag.  This also
provides a good place to document that the smp_store/load business is unnecessary
since there can't be users.

>  	node->track_write = kvm_mmu_pte_write;
>  	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
> -static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
> +int alloc_memslots_rmaps(struct kvm *kvm, struct kvm_memslots *slots)
> +{
> +	struct kvm_memory_slot *slot;
> +	int r = 0;
> +
> +	kvm_for_each_memslot(slot, slots) {
> +		r = alloc_memslot_rmap(kvm, slot, slot->npages);
> +		if (r)
> +			break;
> +	}
> +	return r;
> +}

Just open code this in the caller, it's literally one line of code and the
indentation isn't bad.

> +
> +int alloc_all_memslots_rmaps(struct kvm *kvm)
> +{
> +	struct kvm_memslots *slots;
> +	int r = 0;
> +	int i;
> +
> +	mutex_lock(&kvm->slots_arch_lock);
> +	kvm->arch.alloc_memslot_rmaps = true;
> +
> +	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
> +		slots = __kvm_memslots(kvm, i);
> +		r = alloc_memslots_rmaps(kvm, slots);
> +		if (r)

It'd be easier just to destroy the rmaps on failure and then do:

	if (kvm->arch.needs_memslots_rmaps)
		return;

	mutex_lock(&kvm->slots_arch_lock);

	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
		kvm_for_each_memslot(slot, __kvm_memslots(kvm, i)) {
			r = alloc_memslot_rmap(kvm, slot, slot->npages);
				break;
		}
	}

	if (!r)
		smp_store_release(kvm->arch.needs_memslots_rmaps, true);
	else
		kvm_free_rmaps(kvm);
	mutex_unlock(&kvm->slots_arch_lock);


and make alloc_memslot_rmap() a pure allocator (no checks on whether it should
actually do allocations), i.e. push the check to the memslot flow:

static int kvm_alloc_memslot_metadata(struct kvm *kvm,
				      struct kvm_memory_slot *slot,
				      unsigned long npages)
{
	int i;
	int r;

	/*
	 * Clear out the previous array pointers for the KVM_MR_MOVE case.  The
	 * old arrays will be freed by __kvm_set_memory_region() if installing
	 * the new memslot is successful.
	 */
	memset(&slot->arch, 0, sizeof(slot->arch));

	if (kvm->arch.needs_memslots_rmaps) {
		r = alloc_memslot_rmap(kvm, slot, npages);
		if (r)
			return r;
	}


With that, there's no need for the separate shadow_mmu_active flag, and you can
do s/activate_shadow_mmu/kvm_activate_rmaps or so.


> +			break;
> +	}
> +	mutex_unlock(&kvm->slots_arch_lock);
> +	return r;
> +}
> +
> +static int kvm_alloc_memslot_metadata(struct kvm *kvm,
> +				      struct kvm_memory_slot *slot,
>  				      unsigned long npages)
>  {
>  	int i;
> @@ -10881,7 +10927,7 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
>  	 */
>  	memset(&slot->arch, 0, sizeof(slot->arch));
>  
> -	r = alloc_memslot_rmap(slot, npages);
> +	r = alloc_memslot_rmap(kvm, slot, npages);
>  	if (r)
>  		return r;
>  
> @@ -10954,7 +11000,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
>  				enum kvm_mr_change change)
>  {
>  	if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
> -		return kvm_alloc_memslot_metadata(memslot,
> +		return kvm_alloc_memslot_metadata(kvm, memslot,
>  						  mem->memory_size >> PAGE_SHIFT);
>  	return 0;
>  }
> -- 
> 2.31.1.527.g47e6f16901-goog
>
Paolo Bonzini May 4, 2021, 8:19 p.m. UTC | #4
On 04/05/21 22:13, Sean Christopherson wrote:
>> +	/*
>> +	 * If set, the rmap should be allocated for any newly created or
>> +	 * modified memslots. If allocating rmaps lazily, this may be set
>> +	 * before the rmaps are allocated for existing memslots, but
>> +	 * shadow_mmu_active will not be set until after the rmaps are fully
>> +	 * allocated.
>> +	 */
>> +	bool alloc_memslot_rmaps;
> Maybe "need_rmaps" or "need_memslot_rmaps"?
> 

Since we're bikeshedding I prefer "memslots_have_rmaps" or something not 
too distant from that.

Paolo
Paolo Bonzini May 4, 2021, 8:22 p.m. UTC | #5
On 29/04/21 23:18, Ben Gardon wrote:
> +	/*
> +	 * If set, the rmap should be allocated for any newly created or
> +	 * modified memslots. If allocating rmaps lazily, this may be set
> +	 * before the rmaps are allocated for existing memslots, but
> +	 * shadow_mmu_active will not be set until after the rmaps are fully
> +	 * allocated.
> +	 */
> +	bool alloc_memslot_rmaps;

Let's remove the whole sentence starting with "If allocating rmaps 
lazily".  The part about shadow_mmu_active should go there, while the 
rest is pointless as long as we just say that this flag will be accessed 
only under slots_arch_lock.

(Regarding shadow_mmu_active, I think I know what Sean will be 
suggesting because I had a similar thought and decided it introduced 
extra unnecessary complication... but maybe not, so let's see what he says).

Paolo
Sean Christopherson May 4, 2021, 8:34 p.m. UTC | #6
On Tue, May 04, 2021, Paolo Bonzini wrote:
> On 04/05/21 22:13, Sean Christopherson wrote:
> > > +	/*
> > > +	 * If set, the rmap should be allocated for any newly created or
> > > +	 * modified memslots. If allocating rmaps lazily, this may be set
> > > +	 * before the rmaps are allocated for existing memslots, but
> > > +	 * shadow_mmu_active will not be set until after the rmaps are fully
> > > +	 * allocated.
> > > +	 */
> > > +	bool alloc_memslot_rmaps;
> > Maybe "need_rmaps" or "need_memslot_rmaps"?
> > 
> 
> Since we're bikeshedding I prefer "memslots_have_rmaps" or something not too
> distant from that.

Works for me.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3900dcf2439e..b8633ed00a6a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1124,6 +1124,15 @@  struct kvm_arch {
 #endif /* CONFIG_X86_64 */
 
 	bool shadow_mmu_active;
+
+	/*
+	 * If set, the rmap should be allocated for any newly created or
+	 * modified memslots. If allocating rmaps lazily, this may be set
+	 * before the rmaps are allocated for existing memslots, but
+	 * shadow_mmu_active will not be set until after the rmaps are fully
+	 * allocated.
+	 */
+	bool alloc_memslot_rmaps;
 };
 
 struct kvm_vm_stat {
@@ -1855,4 +1864,6 @@  static inline int kvm_cpu_get_apicid(int mps_cpu)
 
 int kvm_cpu_dirty_log_size(void);
 
+int alloc_all_memslots_rmaps(struct kvm *kvm);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e252af46f205..b2a6585bd978 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3125,9 +3125,17 @@  static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 	return ret;
 }
 
-void activate_shadow_mmu(struct kvm *kvm)
+int activate_shadow_mmu(struct kvm *kvm)
 {
+	int r;
+
+	r = alloc_all_memslots_rmaps(kvm);
+	if (r)
+		return r;
+
 	kvm->arch.shadow_mmu_active = true;
+
+	return 0;
 }
 
 static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
@@ -3300,7 +3308,9 @@  static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	activate_shadow_mmu(vcpu->kvm);
+	r = activate_shadow_mmu(vcpu->kvm);
+	if (r)
+		return r;
 
 	write_lock(&vcpu->kvm->mmu_lock);
 	r = make_mmu_pages_available(vcpu);
@@ -5491,7 +5501,12 @@  void kvm_mmu_init_vm(struct kvm *kvm)
 	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
 
 	if (!kvm_mmu_init_tdp_mmu(kvm))
-		activate_shadow_mmu(kvm);
+		/*
+		 * No memslots can have been allocated at this point.
+		 * activate_shadow_mmu won't actually need to allocate
+		 * rmaps, so it cannot fail.
+		 */
+		WARN_ON(activate_shadow_mmu(kvm));
 
 	node->track_write = kvm_mmu_pte_write;
 	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 297a911c018c..c6b21a916452 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -165,6 +165,6 @@  void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
 void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 
-void activate_shadow_mmu(struct kvm *kvm);
+int activate_shadow_mmu(struct kvm *kvm);
 
 #endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fc32a7dbe4c4..c72b35cbaef7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10842,11 +10842,24 @@  void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
 	kvm_page_track_free_memslot(slot);
 }
 
-static int alloc_memslot_rmap(struct kvm_memory_slot *slot,
+static int alloc_memslot_rmap(struct kvm *kvm, struct kvm_memory_slot *slot,
 			      unsigned long npages)
 {
 	int i;
 
+	if (!kvm->arch.alloc_memslot_rmaps)
+		return 0;
+
+	/*
+	 * All rmaps for a memslot should be allocated either before
+	 * the memslot is installed (in which case no other threads
+	 * should have a pointer to it), or under the
+	 * slots_arch_lock. Avoid overwriting already allocated
+	 * rmaps.
+	 */
+	if (slot->arch.rmap[0])
+		return 0;
+
 	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
 		int lpages;
 		int level = i + 1;
@@ -10868,7 +10881,40 @@  static int alloc_memslot_rmap(struct kvm_memory_slot *slot,
 	return -ENOMEM;
 }
 
-static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
+int alloc_memslots_rmaps(struct kvm *kvm, struct kvm_memslots *slots)
+{
+	struct kvm_memory_slot *slot;
+	int r = 0;
+
+	kvm_for_each_memslot(slot, slots) {
+		r = alloc_memslot_rmap(kvm, slot, slot->npages);
+		if (r)
+			break;
+	}
+	return r;
+}
+
+int alloc_all_memslots_rmaps(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	int r = 0;
+	int i;
+
+	mutex_lock(&kvm->slots_arch_lock);
+	kvm->arch.alloc_memslot_rmaps = true;
+
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		slots = __kvm_memslots(kvm, i);
+		r = alloc_memslots_rmaps(kvm, slots);
+		if (r)
+			break;
+	}
+	mutex_unlock(&kvm->slots_arch_lock);
+	return r;
+}
+
+static int kvm_alloc_memslot_metadata(struct kvm *kvm,
+				      struct kvm_memory_slot *slot,
 				      unsigned long npages)
 {
 	int i;
@@ -10881,7 +10927,7 @@  static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
 	 */
 	memset(&slot->arch, 0, sizeof(slot->arch));
 
-	r = alloc_memslot_rmap(slot, npages);
+	r = alloc_memslot_rmap(kvm, slot, npages);
 	if (r)
 		return r;
 
@@ -10954,7 +11000,7 @@  int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				enum kvm_mr_change change)
 {
 	if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
-		return kvm_alloc_memslot_metadata(memslot,
+		return kvm_alloc_memslot_metadata(kvm, memslot,
 						  mem->memory_size >> PAGE_SHIFT);
 	return 0;
 }