Message ID | 20210429211833.3361994-8-bgardon@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Lazily allocate memslot rmaps | expand |
On 29/04/21 23:18, Ben Gardon wrote: > +int alloc_memslots_rmaps(struct kvm *kvm, struct kvm_memslots *slots) This can be static, can't it? Paolo > +{ > + struct kvm_memory_slot *slot; > + int r = 0; > + > + kvm_for_each_memslot(slot, slots) { > + r = alloc_memslot_rmap(kvm, slot, slot->npages); > + if (r) > + break; > + } > + return r; > +} > +
On Mon, May 3, 2021 at 6:42 AM Paolo Bonzini <pbonzini@redhat.com> wrote: > > On 29/04/21 23:18, Ben Gardon wrote: > > +int alloc_memslots_rmaps(struct kvm *kvm, struct kvm_memslots *slots) > > This can be static, can't it? Ah, yes. Absolutely. > > Paolo > > > +{ > > + struct kvm_memory_slot *slot; > > + int r = 0; > > + > > + kvm_for_each_memslot(slot, slots) { > > + r = alloc_memslot_rmap(kvm, slot, slot->npages); > > + if (r) > > + break; > > + } > > + return r; > > +} > > + >
On Thu, Apr 29, 2021, Ben Gardon wrote: > If the TDP MMU is in use, wait to allocate the rmaps until the shadow > MMU is actually used. (i.e. a nested VM is launched.) This saves memory > equal to 0.2% of guest memory in cases where the TDP MMU is used and > there are no nested guests involved. > > Signed-off-by: Ben Gardon <bgardon@google.com> > --- > arch/x86/include/asm/kvm_host.h | 11 +++++++ > arch/x86/kvm/mmu/mmu.c | 21 +++++++++++-- > arch/x86/kvm/mmu/mmu_internal.h | 2 +- > arch/x86/kvm/x86.c | 54 ++++++++++++++++++++++++++++++--- > 4 files changed, 80 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 3900dcf2439e..b8633ed00a6a 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1124,6 +1124,15 @@ struct kvm_arch { > #endif /* CONFIG_X86_64 */ > > bool shadow_mmu_active; > + > + /* > + * If set, the rmap should be allocated for any newly created or > + * modified memslots. If allocating rmaps lazily, this may be set > + * before the rmaps are allocated for existing memslots, but > + * shadow_mmu_active will not be set until after the rmaps are fully > + * allocated. > + */ > + bool alloc_memslot_rmaps; Maybe "need_rmaps" or "need_memslot_rmaps"? > }; > > struct kvm_vm_stat { > @@ -1855,4 +1864,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) > > int kvm_cpu_dirty_log_size(void); > > +int alloc_all_memslots_rmaps(struct kvm *kvm); > + > #endif /* _ASM_X86_KVM_HOST_H */ > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index e252af46f205..b2a6585bd978 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -3125,9 +3125,17 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > return ret; > } > > -void activate_shadow_mmu(struct kvm *kvm) > +int activate_shadow_mmu(struct kvm *kvm) > { > + int r; > + > + r = alloc_all_memslots_rmaps(kvm); > + if (r) > + return r; > + > kvm->arch.shadow_mmu_active = true; If shadow_mmu_active goes away, so does this helper. > + > + return 0; > } > > static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, > @@ -3300,7 +3308,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) > } > } > > - activate_shadow_mmu(vcpu->kvm); > + r = activate_shadow_mmu(vcpu->kvm); > + if (r) > + return r; > > write_lock(&vcpu->kvm->mmu_lock); > r = make_mmu_pages_available(vcpu); > @@ -5491,7 +5501,12 @@ void kvm_mmu_init_vm(struct kvm *kvm) > struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; > > if (!kvm_mmu_init_tdp_mmu(kvm)) > - activate_shadow_mmu(kvm); > + /* > + * No memslots can have been allocated at this point. > + * activate_shadow_mmu won't actually need to allocate > + * rmaps, so it cannot fail. > + */ > + WARN_ON(activate_shadow_mmu(kvm)); This is where I really don't like calling the full flow. VM init is already special, I don't see any harm in open coding the setting of the flag. This also provides a good place to document that the smp_store/load business is unnecessary since there can't be users. > node->track_write = kvm_mmu_pte_write; > node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; > -static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot, > +int alloc_memslots_rmaps(struct kvm *kvm, struct kvm_memslots *slots) > +{ > + struct kvm_memory_slot *slot; > + int r = 0; > + > + kvm_for_each_memslot(slot, slots) { > + r = alloc_memslot_rmap(kvm, slot, slot->npages); > + if (r) > + break; > + } > + return r; > +} Just open code this in the caller, it's literally one line of code and the indentation isn't bad. > + > +int alloc_all_memslots_rmaps(struct kvm *kvm) > +{ > + struct kvm_memslots *slots; > + int r = 0; > + int i; > + > + mutex_lock(&kvm->slots_arch_lock); > + kvm->arch.alloc_memslot_rmaps = true; > + > + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { > + slots = __kvm_memslots(kvm, i); > + r = alloc_memslots_rmaps(kvm, slots); > + if (r) It'd be easier just to destroy the rmaps on failure and then do: if (kvm->arch.needs_memslots_rmaps) return; mutex_lock(&kvm->slots_arch_lock); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { kvm_for_each_memslot(slot, __kvm_memslots(kvm, i)) { r = alloc_memslot_rmap(kvm, slot, slot->npages); break; } } if (!r) smp_store_release(kvm->arch.needs_memslots_rmaps, true); else kvm_free_rmaps(kvm); mutex_unlock(&kvm->slots_arch_lock); and make alloc_memslot_rmap() a pure allocator (no checks on whether it should actually do allocations), i.e. push the check to the memslot flow: static int kvm_alloc_memslot_metadata(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { int i; int r; /* * Clear out the previous array pointers for the KVM_MR_MOVE case. The * old arrays will be freed by __kvm_set_memory_region() if installing * the new memslot is successful. */ memset(&slot->arch, 0, sizeof(slot->arch)); if (kvm->arch.needs_memslots_rmaps) { r = alloc_memslot_rmap(kvm, slot, npages); if (r) return r; } With that, there's no need for the separate shadow_mmu_active flag, and you can do s/activate_shadow_mmu/kvm_activate_rmaps or so. > + break; > + } > + mutex_unlock(&kvm->slots_arch_lock); > + return r; > +} > + > +static int kvm_alloc_memslot_metadata(struct kvm *kvm, > + struct kvm_memory_slot *slot, > unsigned long npages) > { > int i; > @@ -10881,7 +10927,7 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot, > */ > memset(&slot->arch, 0, sizeof(slot->arch)); > > - r = alloc_memslot_rmap(slot, npages); > + r = alloc_memslot_rmap(kvm, slot, npages); > if (r) > return r; > > @@ -10954,7 +11000,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, > enum kvm_mr_change change) > { > if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) > - return kvm_alloc_memslot_metadata(memslot, > + return kvm_alloc_memslot_metadata(kvm, memslot, > mem->memory_size >> PAGE_SHIFT); > return 0; > } > -- > 2.31.1.527.g47e6f16901-goog >
On 04/05/21 22:13, Sean Christopherson wrote: >> + /* >> + * If set, the rmap should be allocated for any newly created or >> + * modified memslots. If allocating rmaps lazily, this may be set >> + * before the rmaps are allocated for existing memslots, but >> + * shadow_mmu_active will not be set until after the rmaps are fully >> + * allocated. >> + */ >> + bool alloc_memslot_rmaps; > Maybe "need_rmaps" or "need_memslot_rmaps"? > Since we're bikeshedding I prefer "memslots_have_rmaps" or something not too distant from that. Paolo
On 29/04/21 23:18, Ben Gardon wrote: > + /* > + * If set, the rmap should be allocated for any newly created or > + * modified memslots. If allocating rmaps lazily, this may be set > + * before the rmaps are allocated for existing memslots, but > + * shadow_mmu_active will not be set until after the rmaps are fully > + * allocated. > + */ > + bool alloc_memslot_rmaps; Let's remove the whole sentence starting with "If allocating rmaps lazily". The part about shadow_mmu_active should go there, while the rest is pointless as long as we just say that this flag will be accessed only under slots_arch_lock. (Regarding shadow_mmu_active, I think I know what Sean will be suggesting because I had a similar thought and decided it introduced extra unnecessary complication... but maybe not, so let's see what he says). Paolo
On Tue, May 04, 2021, Paolo Bonzini wrote: > On 04/05/21 22:13, Sean Christopherson wrote: > > > + /* > > > + * If set, the rmap should be allocated for any newly created or > > > + * modified memslots. If allocating rmaps lazily, this may be set > > > + * before the rmaps are allocated for existing memslots, but > > > + * shadow_mmu_active will not be set until after the rmaps are fully > > > + * allocated. > > > + */ > > > + bool alloc_memslot_rmaps; > > Maybe "need_rmaps" or "need_memslot_rmaps"? > > > > Since we're bikeshedding I prefer "memslots_have_rmaps" or something not too > distant from that. Works for me.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3900dcf2439e..b8633ed00a6a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1124,6 +1124,15 @@ struct kvm_arch { #endif /* CONFIG_X86_64 */ bool shadow_mmu_active; + + /* + * If set, the rmap should be allocated for any newly created or + * modified memslots. If allocating rmaps lazily, this may be set + * before the rmaps are allocated for existing memslots, but + * shadow_mmu_active will not be set until after the rmaps are fully + * allocated. + */ + bool alloc_memslot_rmaps; }; struct kvm_vm_stat { @@ -1855,4 +1864,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) int kvm_cpu_dirty_log_size(void); +int alloc_all_memslots_rmaps(struct kvm *kvm); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e252af46f205..b2a6585bd978 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3125,9 +3125,17 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, return ret; } -void activate_shadow_mmu(struct kvm *kvm) +int activate_shadow_mmu(struct kvm *kvm) { + int r; + + r = alloc_all_memslots_rmaps(kvm); + if (r) + return r; + kvm->arch.shadow_mmu_active = true; + + return 0; } static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, @@ -3300,7 +3308,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) } } - activate_shadow_mmu(vcpu->kvm); + r = activate_shadow_mmu(vcpu->kvm); + if (r) + return r; write_lock(&vcpu->kvm->mmu_lock); r = make_mmu_pages_available(vcpu); @@ -5491,7 +5501,12 @@ void kvm_mmu_init_vm(struct kvm *kvm) struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; if (!kvm_mmu_init_tdp_mmu(kvm)) - activate_shadow_mmu(kvm); + /* + * No memslots can have been allocated at this point. + * activate_shadow_mmu won't actually need to allocate + * rmaps, so it cannot fail. + */ + WARN_ON(activate_shadow_mmu(kvm)); node->track_write = kvm_mmu_pte_write; node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 297a911c018c..c6b21a916452 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -165,6 +165,6 @@ void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp); void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp); -void activate_shadow_mmu(struct kvm *kvm); +int activate_shadow_mmu(struct kvm *kvm); #endif /* __KVM_X86_MMU_INTERNAL_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fc32a7dbe4c4..c72b35cbaef7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10842,11 +10842,24 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) kvm_page_track_free_memslot(slot); } -static int alloc_memslot_rmap(struct kvm_memory_slot *slot, +static int alloc_memslot_rmap(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { int i; + if (!kvm->arch.alloc_memslot_rmaps) + return 0; + + /* + * All rmaps for a memslot should be allocated either before + * the memslot is installed (in which case no other threads + * should have a pointer to it), or under the + * slots_arch_lock. Avoid overwriting already allocated + * rmaps. + */ + if (slot->arch.rmap[0]) + return 0; + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { int lpages; int level = i + 1; @@ -10868,7 +10881,40 @@ static int alloc_memslot_rmap(struct kvm_memory_slot *slot, return -ENOMEM; } -static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot, +int alloc_memslots_rmaps(struct kvm *kvm, struct kvm_memslots *slots) +{ + struct kvm_memory_slot *slot; + int r = 0; + + kvm_for_each_memslot(slot, slots) { + r = alloc_memslot_rmap(kvm, slot, slot->npages); + if (r) + break; + } + return r; +} + +int alloc_all_memslots_rmaps(struct kvm *kvm) +{ + struct kvm_memslots *slots; + int r = 0; + int i; + + mutex_lock(&kvm->slots_arch_lock); + kvm->arch.alloc_memslot_rmaps = true; + + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { + slots = __kvm_memslots(kvm, i); + r = alloc_memslots_rmaps(kvm, slots); + if (r) + break; + } + mutex_unlock(&kvm->slots_arch_lock); + return r; +} + +static int kvm_alloc_memslot_metadata(struct kvm *kvm, + struct kvm_memory_slot *slot, unsigned long npages) { int i; @@ -10881,7 +10927,7 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot, */ memset(&slot->arch, 0, sizeof(slot->arch)); - r = alloc_memslot_rmap(slot, npages); + r = alloc_memslot_rmap(kvm, slot, npages); if (r) return r; @@ -10954,7 +11000,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, enum kvm_mr_change change) { if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) - return kvm_alloc_memslot_metadata(memslot, + return kvm_alloc_memslot_metadata(kvm, memslot, mem->memory_size >> PAGE_SHIFT); return 0; }
If the TDP MMU is in use, wait to allocate the rmaps until the shadow MMU is actually used. (i.e. a nested VM is launched.) This saves memory equal to 0.2% of guest memory in cases where the TDP MMU is used and there are no nested guests involved. Signed-off-by: Ben Gardon <bgardon@google.com> --- arch/x86/include/asm/kvm_host.h | 11 +++++++ arch/x86/kvm/mmu/mmu.c | 21 +++++++++++-- arch/x86/kvm/mmu/mmu_internal.h | 2 +- arch/x86/kvm/x86.c | 54 ++++++++++++++++++++++++++++++--- 4 files changed, 80 insertions(+), 8 deletions(-)