Message ID | 20220203010051.2813563-20-dmatlack@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Extend Eager Page Splitting to the shadow MMU | expand |
On Thu, 03 Feb 2022 01:00:47 +0000, David Matlack <dmatlack@google.com> wrote: > > Allow the capacity of the kvm_mmu_memory_cache struct to be chosen at > declaration time rather than being fixed for all declarations. This will > be used in a follow-up commit to declare an cache in x86 with a capacity > of 512+ objects without having to increase the capacity of all caches in > KVM. > > No functional change intended. > > Signed-off-by: David Matlack <dmatlack@google.com> > --- > arch/arm64/include/asm/kvm_host.h | 2 +- > arch/arm64/kvm/mmu.c | 12 ++++++------ > arch/mips/include/asm/kvm_host.h | 2 +- > arch/x86/include/asm/kvm_host.h | 8 ++++---- > include/linux/kvm_types.h | 24 ++++++++++++++++++++++-- > virt/kvm/kvm_main.c | 8 +++++++- > 6 files changed, 41 insertions(+), 15 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 3b44ea17af88..a450b91cc2d9 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -357,7 +357,7 @@ struct kvm_vcpu_arch { > bool pause; > > /* Cache some mmu pages needed inside spinlock regions */ > - struct kvm_mmu_memory_cache mmu_page_cache; > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_cache); I must say I'm really not a fan of the anonymous structure trick. I can see why you are doing it that way, but it feels pretty brittle. > > /* Target CPU and feature flags */ > int target; > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index bc2aba953299..9c853c529b49 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -765,7 +765,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > { > phys_addr_t addr; > int ret = 0; > - struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, }; > + DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = {}; > + struct kvm_mmu_memory_cache *cache = &page_cache.cache; > struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | > KVM_PGTABLE_PROT_R | > @@ -774,18 +775,17 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > if (is_protected_kvm_enabled()) > return -EPERM; > > + cache->gfp_zero = __GFP_ZERO; nit: consider this instead, which preserves the existing flow: diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 26d6c53be083..86a7ebd03a44 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -764,7 +764,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, { phys_addr_t addr; int ret = 0; - DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = {}; + DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = { + .cache = { .gfp_zero = __GFP_ZERO}, + }; struct kvm_mmu_memory_cache *cache = &page_cache.cache; struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | @@ -774,7 +776,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (is_protected_kvm_enabled()) return -EPERM; - cache->gfp_zero = __GFP_ZERO; size += offset_in_page(guest_ipa); guest_ipa &= PAGE_MASK; but whole "declare the outer structure and just use the inner one" hack is... huh... :-/ This hunk also conflicts with what currently sits in -next. Not a big deal, but just so you know. > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h > index dceac12c1ce5..9575fb8d333f 100644 > --- a/include/linux/kvm_types.h > +++ b/include/linux/kvm_types.h > @@ -78,14 +78,34 @@ struct gfn_to_pfn_cache { > * MMU flows is problematic, as is triggering reclaim, I/O, etc... while > * holding MMU locks. Note, these caches act more like prefetch buffers than > * classical caches, i.e. objects are not returned to the cache on being freed. > + * > + * The storage for the cache objects is laid out after the struct to allow > + * different declarations to choose different capacities. If the capacity field > + * is 0, the capacity is assumed to be KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE. > */ > struct kvm_mmu_memory_cache { > int nobjs; > + int capacity; > gfp_t gfp_zero; > struct kmem_cache *kmem_cache; > - void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE]; > + void *objects[0]; The VLA police is going to track you down ([0] vs []). M.
On Thu, Feb 24, 2022 at 3:29 AM Marc Zyngier <maz@kernel.org> wrote: > > On Thu, 03 Feb 2022 01:00:47 +0000, > David Matlack <dmatlack@google.com> wrote: > > > > Allow the capacity of the kvm_mmu_memory_cache struct to be chosen at > > declaration time rather than being fixed for all declarations. This will > > be used in a follow-up commit to declare an cache in x86 with a capacity > > of 512+ objects without having to increase the capacity of all caches in > > KVM. > > > > No functional change intended. > > > > Signed-off-by: David Matlack <dmatlack@google.com> > > --- > > arch/arm64/include/asm/kvm_host.h | 2 +- > > arch/arm64/kvm/mmu.c | 12 ++++++------ > > arch/mips/include/asm/kvm_host.h | 2 +- > > arch/x86/include/asm/kvm_host.h | 8 ++++---- > > include/linux/kvm_types.h | 24 ++++++++++++++++++++++-- > > virt/kvm/kvm_main.c | 8 +++++++- > > 6 files changed, 41 insertions(+), 15 deletions(-) > > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > index 3b44ea17af88..a450b91cc2d9 100644 > > --- a/arch/arm64/include/asm/kvm_host.h > > +++ b/arch/arm64/include/asm/kvm_host.h > > @@ -357,7 +357,7 @@ struct kvm_vcpu_arch { > > bool pause; > > > > /* Cache some mmu pages needed inside spinlock regions */ > > - struct kvm_mmu_memory_cache mmu_page_cache; > > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_cache); > > I must say I'm really not a fan of the anonymous structure trick. I > can see why you are doing it that way, but it feels pretty brittle. Yeah I don't love it. It's really optimizing for minimizing the patch diff. The alternative I considered was to dynamically allocate the kvm_mmu_memory_cache structs. This would get rid of the anonymous struct and the objects array, and also eliminate the rather gross capacity hack in kvm_mmu_topup_memory_cache(). The downsides of this approach is more code and more failure paths if the allocation fails. > > > > > /* Target CPU and feature flags */ > > int target; > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > > index bc2aba953299..9c853c529b49 100644 > > --- a/arch/arm64/kvm/mmu.c > > +++ b/arch/arm64/kvm/mmu.c > > @@ -765,7 +765,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > > { > > phys_addr_t addr; > > int ret = 0; > > - struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, }; > > + DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = {}; > > + struct kvm_mmu_memory_cache *cache = &page_cache.cache; > > struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | > > KVM_PGTABLE_PROT_R | > > @@ -774,18 +775,17 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > > if (is_protected_kvm_enabled()) > > return -EPERM; > > > > + cache->gfp_zero = __GFP_ZERO; > > nit: consider this instead, which preserves the existing flow: Will do. > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 26d6c53be083..86a7ebd03a44 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -764,7 +764,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > { > phys_addr_t addr; > int ret = 0; > - DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = {}; > + DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = { > + .cache = { .gfp_zero = __GFP_ZERO}, > + }; > struct kvm_mmu_memory_cache *cache = &page_cache.cache; > struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | > @@ -774,7 +776,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > if (is_protected_kvm_enabled()) > return -EPERM; > > - cache->gfp_zero = __GFP_ZERO; > size += offset_in_page(guest_ipa); > guest_ipa &= PAGE_MASK; > > but whole "declare the outer structure and just use the inner one" > hack is... huh... :-/ Yeah it's not great. Unfortunately (or maybe fortunately?) anonymous structs cannot be defined in functions. So naming the outer struct is necessary even though we only need to use the inner one. > > This hunk also conflicts with what currently sits in -next. Not a big > deal, but just so you know. Ack. > > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h > > index dceac12c1ce5..9575fb8d333f 100644 > > --- a/include/linux/kvm_types.h > > +++ b/include/linux/kvm_types.h > > @@ -78,14 +78,34 @@ struct gfn_to_pfn_cache { > > * MMU flows is problematic, as is triggering reclaim, I/O, etc... while > > * holding MMU locks. Note, these caches act more like prefetch buffers than > > * classical caches, i.e. objects are not returned to the cache on being freed. > > + * > > + * The storage for the cache objects is laid out after the struct to allow > > + * different declarations to choose different capacities. If the capacity field > > + * is 0, the capacity is assumed to be KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE. > > */ > > struct kvm_mmu_memory_cache { > > int nobjs; > > + int capacity; > > gfp_t gfp_zero; > > struct kmem_cache *kmem_cache; > > - void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE]; > > + void *objects[0]; > > The VLA police is going to track you down ([0] vs []). Thanks! > > M. > > -- > Without deviation from the norm, progress is not possible.
On Thu, Feb 24, 2022 at 11:20 AM David Matlack <dmatlack@google.com> wrote: > > On Thu, Feb 24, 2022 at 3:29 AM Marc Zyngier <maz@kernel.org> wrote: > > > > On Thu, 03 Feb 2022 01:00:47 +0000, > > David Matlack <dmatlack@google.com> wrote: > > > [...] > > > > > > /* Cache some mmu pages needed inside spinlock regions */ > > > - struct kvm_mmu_memory_cache mmu_page_cache; > > > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_cache); > > > > I must say I'm really not a fan of the anonymous structure trick. I > > can see why you are doing it that way, but it feels pretty brittle. > > Yeah I don't love it. It's really optimizing for minimizing the patch diff. > > The alternative I considered was to dynamically allocate the > kvm_mmu_memory_cache structs. This would get rid of the anonymous > struct and the objects array, and also eliminate the rather gross > capacity hack in kvm_mmu_topup_memory_cache(). > > The downsides of this approach is more code and more failure paths if > the allocation fails. I tried changing all kvm_mmu_memory_cache structs to be dynamically allocated, but it created a lot of complexity to the setup/teardown code paths in x86, arm64, mips, and riscv (the arches that use the caches). I don't think this route is worth it, especially since these structs don't *need* to be dynamically allocated. When you said the anonymous struct feels brittle, what did you have in mind specifically? > > > > > > > > > /* Target CPU and feature flags */ > > > int target; > > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > > > index bc2aba953299..9c853c529b49 100644 > > > --- a/arch/arm64/kvm/mmu.c > > > +++ b/arch/arm64/kvm/mmu.c > > > @@ -765,7 +765,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > > > { > > > phys_addr_t addr; > > > int ret = 0; > > > - struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, }; > > > + DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = {}; > > > + struct kvm_mmu_memory_cache *cache = &page_cache.cache; > > > struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > > > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | > > > KVM_PGTABLE_PROT_R | > > > @@ -774,18 +775,17 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > > > if (is_protected_kvm_enabled()) > > > return -EPERM; > > > > > > + cache->gfp_zero = __GFP_ZERO; > > > > nit: consider this instead, which preserves the existing flow: > > Will do. > > > > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > > index 26d6c53be083..86a7ebd03a44 100644 > > --- a/arch/arm64/kvm/mmu.c > > +++ b/arch/arm64/kvm/mmu.c > > @@ -764,7 +764,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > > { > > phys_addr_t addr; > > int ret = 0; > > - DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = {}; > > + DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = { > > + .cache = { .gfp_zero = __GFP_ZERO}, > > + }; > > struct kvm_mmu_memory_cache *cache = &page_cache.cache; > > struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | > > @@ -774,7 +776,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > > if (is_protected_kvm_enabled()) > > return -EPERM; > > > > - cache->gfp_zero = __GFP_ZERO; > > size += offset_in_page(guest_ipa); > > guest_ipa &= PAGE_MASK; > > > > but whole "declare the outer structure and just use the inner one" > > hack is... huh... :-/ > > Yeah it's not great. Unfortunately (or maybe fortunately?) anonymous > structs cannot be defined in functions. So naming the outer struct is > necessary even though we only need to use the inner one. I see two alternatives to make this cleaner: 1. Dynamically allocate just this cache. The caches defined in vcpu_arch will continue to use DEFINE_KVM_MMU_MEMORY_CACHE(). This would get rid of the outer struct but require an extra memory allocation. 2. Move this cache to struct kvm_arch using DEFINE_KVM_MMU_MEMORY_CACHE(). Then we don't need to stack allocate it or dynamically allocate it. Do either of these approaches appeal to you more than the current one? > > > > > This hunk also conflicts with what currently sits in -next. Not a big > > deal, but just so you know. > > Ack. > > > > > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h > > > index dceac12c1ce5..9575fb8d333f 100644 > > > --- a/include/linux/kvm_types.h > > > +++ b/include/linux/kvm_types.h > > > @@ -78,14 +78,34 @@ struct gfn_to_pfn_cache { > > > * MMU flows is problematic, as is triggering reclaim, I/O, etc... while > > > * holding MMU locks. Note, these caches act more like prefetch buffers than > > > * classical caches, i.e. objects are not returned to the cache on being freed. > > > + * > > > + * The storage for the cache objects is laid out after the struct to allow > > > + * different declarations to choose different capacities. If the capacity field > > > + * is 0, the capacity is assumed to be KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE. > > > */ > > > struct kvm_mmu_memory_cache { > > > int nobjs; > > > + int capacity; > > > gfp_t gfp_zero; > > > struct kmem_cache *kmem_cache; > > > - void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE]; > > > + void *objects[0]; > > > > The VLA police is going to track you down ([0] vs []). > > Thanks! > > > > > > M. > > > > -- > > Without deviation from the norm, progress is not possible.
On Fri, Mar 4, 2022 at 1:59 PM David Matlack <dmatlack@google.com> wrote: > > On Thu, Feb 24, 2022 at 11:20 AM David Matlack <dmatlack@google.com> wrote: > > > > On Thu, Feb 24, 2022 at 3:29 AM Marc Zyngier <maz@kernel.org> wrote: > > > > > > On Thu, 03 Feb 2022 01:00:47 +0000, > > > David Matlack <dmatlack@google.com> wrote: > > > > > > [...] > > > > > > > > > /* Cache some mmu pages needed inside spinlock regions */ > > > > - struct kvm_mmu_memory_cache mmu_page_cache; > > > > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_cache); > > > > > > I must say I'm really not a fan of the anonymous structure trick. I > > > can see why you are doing it that way, but it feels pretty brittle. > > > > Yeah I don't love it. It's really optimizing for minimizing the patch diff. > > > > The alternative I considered was to dynamically allocate the > > kvm_mmu_memory_cache structs. This would get rid of the anonymous > > struct and the objects array, and also eliminate the rather gross > > capacity hack in kvm_mmu_topup_memory_cache(). > > > > The downsides of this approach is more code and more failure paths if > > the allocation fails. > > I tried changing all kvm_mmu_memory_cache structs to be dynamically > allocated, but it created a lot of complexity to the setup/teardown > code paths in x86, arm64, mips, and riscv (the arches that use the > caches). I don't think this route is worth it, especially since these > structs don't *need* to be dynamically allocated. > > When you said the anonymous struct feels brittle, what did you have in > mind specifically? > > > > > > > > > > > > > > /* Target CPU and feature flags */ > > > > int target; > > > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > > > > index bc2aba953299..9c853c529b49 100644 > > > > --- a/arch/arm64/kvm/mmu.c > > > > +++ b/arch/arm64/kvm/mmu.c > > > > @@ -765,7 +765,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > > > > { > > > > phys_addr_t addr; > > > > int ret = 0; > > > > - struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, }; > > > > + DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = {}; > > > > + struct kvm_mmu_memory_cache *cache = &page_cache.cache; > > > > struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > > > > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | > > > > KVM_PGTABLE_PROT_R | > > > > @@ -774,18 +775,17 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > > > > if (is_protected_kvm_enabled()) > > > > return -EPERM; > > > > > > > > + cache->gfp_zero = __GFP_ZERO; > > > > > > nit: consider this instead, which preserves the existing flow: > > > > Will do. > > > > > > > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > > > index 26d6c53be083..86a7ebd03a44 100644 > > > --- a/arch/arm64/kvm/mmu.c > > > +++ b/arch/arm64/kvm/mmu.c > > > @@ -764,7 +764,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > > > { > > > phys_addr_t addr; > > > int ret = 0; > > > - DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = {}; > > > + DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = { > > > + .cache = { .gfp_zero = __GFP_ZERO}, > > > + }; > > > struct kvm_mmu_memory_cache *cache = &page_cache.cache; > > > struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > > > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | > > > @@ -774,7 +776,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, > > > if (is_protected_kvm_enabled()) > > > return -EPERM; > > > > > > - cache->gfp_zero = __GFP_ZERO; > > > size += offset_in_page(guest_ipa); > > > guest_ipa &= PAGE_MASK; > > > > > > but whole "declare the outer structure and just use the inner one" > > > hack is... huh... :-/ > > > > Yeah it's not great. Unfortunately (or maybe fortunately?) anonymous > > structs cannot be defined in functions. So naming the outer struct is > > necessary even though we only need to use the inner one. > > I see two alternatives to make this cleaner: > > 1. Dynamically allocate just this cache. The caches defined in > vcpu_arch will continue to use DEFINE_KVM_MMU_MEMORY_CACHE(). This > would get rid of the outer struct but require an extra memory > allocation. > 2. Move this cache to struct kvm_arch using > DEFINE_KVM_MMU_MEMORY_CACHE(). Then we don't need to stack allocate it > or dynamically allocate it. > > Do either of these approaches appeal to you more than the current one? (There's obvious performance and memory overhead trade-offs with these different approaches, but I don't know enough about arm64 KVM to assess which option might be best.) > > > > > > > > > This hunk also conflicts with what currently sits in -next. Not a big > > > deal, but just so you know. > > > > Ack. > > > > > > > > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h > > > > index dceac12c1ce5..9575fb8d333f 100644 > > > > --- a/include/linux/kvm_types.h > > > > +++ b/include/linux/kvm_types.h > > > > @@ -78,14 +78,34 @@ struct gfn_to_pfn_cache { > > > > * MMU flows is problematic, as is triggering reclaim, I/O, etc... while > > > > * holding MMU locks. Note, these caches act more like prefetch buffers than > > > > * classical caches, i.e. objects are not returned to the cache on being freed. > > > > + * > > > > + * The storage for the cache objects is laid out after the struct to allow > > > > + * different declarations to choose different capacities. If the capacity field > > > > + * is 0, the capacity is assumed to be KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE. > > > > */ > > > > struct kvm_mmu_memory_cache { > > > > int nobjs; > > > > + int capacity; > > > > gfp_t gfp_zero; > > > > struct kmem_cache *kmem_cache; > > > > - void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE]; > > > > + void *objects[0]; > > > > > > The VLA police is going to track you down ([0] vs []). > > > > Thanks! > > > > > > > > > > M. > > > > > > -- > > > Without deviation from the norm, progress is not possible.
On Fri, 04 Mar 2022 21:59:12 +0000, David Matlack <dmatlack@google.com> wrote: > > On Thu, Feb 24, 2022 at 11:20 AM David Matlack <dmatlack@google.com> wrote: > > > > On Thu, Feb 24, 2022 at 3:29 AM Marc Zyngier <maz@kernel.org> wrote: > > > > > > On Thu, 03 Feb 2022 01:00:47 +0000, > > > David Matlack <dmatlack@google.com> wrote: > > > > > > [...] > > > > > > > > > /* Cache some mmu pages needed inside spinlock regions */ > > > > - struct kvm_mmu_memory_cache mmu_page_cache; > > > > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_cache); > > > > > > I must say I'm really not a fan of the anonymous structure trick. I > > > can see why you are doing it that way, but it feels pretty brittle. > > > > Yeah I don't love it. It's really optimizing for minimizing the patch diff. > > > > The alternative I considered was to dynamically allocate the > > kvm_mmu_memory_cache structs. This would get rid of the anonymous > > struct and the objects array, and also eliminate the rather gross > > capacity hack in kvm_mmu_topup_memory_cache(). > > > > The downsides of this approach is more code and more failure paths if > > the allocation fails. > > I tried changing all kvm_mmu_memory_cache structs to be dynamically > allocated, but it created a lot of complexity to the setup/teardown > code paths in x86, arm64, mips, and riscv (the arches that use the > caches). I don't think this route is worth it, especially since these > structs don't *need* to be dynamically allocated. > > When you said the anonymous struct feels brittle, what did you have in > mind specifically? I can perfectly see someone using a kvm_mmu_memory_cache and searching for a bit why they end-up with memory corruption. Yes, this would be a rookie mistake, but this are some expectations all over the kernel that DEFINE_* and the corresponding structure are the same object. [...] > I see two alternatives to make this cleaner: > > 1. Dynamically allocate just this cache. The caches defined in > vcpu_arch will continue to use DEFINE_KVM_MMU_MEMORY_CACHE(). This > would get rid of the outer struct but require an extra memory > allocation. > 2. Move this cache to struct kvm_arch using > DEFINE_KVM_MMU_MEMORY_CACHE(). Then we don't need to stack allocate it > or dynamically allocate it. > > Do either of these approaches appeal to you more than the current one? Certainly, #2 feels more solid. Dynamic allocations (and the resulting pointer chasing) are usually costly in terms of performance, so I'd avoid it if at all possible. That being said, if it turns out that #2 isn't practical, I won't get in the way of your current approach. Moving kvm_mmu_memory_cache to core code was definitely a good cleanup, and I'm not overly excited with the perspective of *more* arch-specific code. Thanks, M.
On Sat, Mar 5, 2022 at 8:55 AM Marc Zyngier <maz@kernel.org> wrote: > > On Fri, 04 Mar 2022 21:59:12 +0000, > David Matlack <dmatlack@google.com> wrote: > > > > On Thu, Feb 24, 2022 at 11:20 AM David Matlack <dmatlack@google.com> wrote: > > > > > > On Thu, Feb 24, 2022 at 3:29 AM Marc Zyngier <maz@kernel.org> wrote: > > > > > > > > On Thu, 03 Feb 2022 01:00:47 +0000, > > > > David Matlack <dmatlack@google.com> wrote: > > > > > > > > > [...] > > > > > > > > > > > > /* Cache some mmu pages needed inside spinlock regions */ > > > > > - struct kvm_mmu_memory_cache mmu_page_cache; > > > > > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_cache); > > > > > > > > I must say I'm really not a fan of the anonymous structure trick. I > > > > can see why you are doing it that way, but it feels pretty brittle. > > > > > > Yeah I don't love it. It's really optimizing for minimizing the patch diff. > > > > > > The alternative I considered was to dynamically allocate the > > > kvm_mmu_memory_cache structs. This would get rid of the anonymous > > > struct and the objects array, and also eliminate the rather gross > > > capacity hack in kvm_mmu_topup_memory_cache(). > > > > > > The downsides of this approach is more code and more failure paths if > > > the allocation fails. > > > > I tried changing all kvm_mmu_memory_cache structs to be dynamically > > allocated, but it created a lot of complexity to the setup/teardown > > code paths in x86, arm64, mips, and riscv (the arches that use the > > caches). I don't think this route is worth it, especially since these > > structs don't *need* to be dynamically allocated. > > > > When you said the anonymous struct feels brittle, what did you have in > > mind specifically? > > I can perfectly see someone using a kvm_mmu_memory_cache and searching > for a bit why they end-up with memory corruption. Yes, this would be a > rookie mistake, but this are some expectations all over the kernel > that DEFINE_* and the corresponding structure are the same object. That is a good point. And that risk is very real given that kvm_mmu_topup_memory_cache() assumes the capacity is KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE if the capacity field is 0. One way to mitigate this would be to get rid of the capacity hack in kvm_mmu_topup_memory_cache() and require the capacity field be explicitly initialized. That will make it harder to trip over this and/or easier to debug because kvm_mmu_topup_memory_cache() can issue a WARN() if the capacity is 0. Once you see that warning and go to initialize the capacity field you'll realize why it needs to be set in the first place. The diff will just be slightly larger to set capacity for each cache. > > [...] > > > I see two alternatives to make this cleaner: > > > > 1. Dynamically allocate just this cache. The caches defined in > > vcpu_arch will continue to use DEFINE_KVM_MMU_MEMORY_CACHE(). This > > would get rid of the outer struct but require an extra memory > > allocation. > > 2. Move this cache to struct kvm_arch using > > DEFINE_KVM_MMU_MEMORY_CACHE(). Then we don't need to stack allocate it > > or dynamically allocate it. > > > > Do either of these approaches appeal to you more than the current one? > > Certainly, #2 feels more solid. Dynamic allocations (and the resulting > pointer chasing) are usually costly in terms of performance, so I'd > avoid it if at all possible. > > That being said, if it turns out that #2 isn't practical, I won't get > in the way of your current approach. Moving kvm_mmu_memory_cache to > core code was definitely a good cleanup, and I'm not overly excited > with the perspective of *more* arch-specific code. Ok I'll play with #2. Thanks for the feedback. > > Thanks, > > M. > > -- > Without deviation from the norm, progress is not possible.
On Mon, 07 Mar 2022 23:49:06 +0000, David Matlack <dmatlack@google.com> wrote: > > On Sat, Mar 5, 2022 at 8:55 AM Marc Zyngier <maz@kernel.org> wrote: > > > > On Fri, 04 Mar 2022 21:59:12 +0000, > > David Matlack <dmatlack@google.com> wrote: > > > > > > On Thu, Feb 24, 2022 at 11:20 AM David Matlack <dmatlack@google.com> wrote: > > > > > > > > On Thu, Feb 24, 2022 at 3:29 AM Marc Zyngier <maz@kernel.org> wrote: > > > > > > > > > > On Thu, 03 Feb 2022 01:00:47 +0000, > > > > > David Matlack <dmatlack@google.com> wrote: > > > > > > > > > > > > [...] > > > > > > > > > > > > > > > /* Cache some mmu pages needed inside spinlock regions */ > > > > > > - struct kvm_mmu_memory_cache mmu_page_cache; > > > > > > + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_cache); > > > > > > > > > > I must say I'm really not a fan of the anonymous structure trick. I > > > > > can see why you are doing it that way, but it feels pretty brittle. > > > > > > > > Yeah I don't love it. It's really optimizing for minimizing the patch diff. > > > > > > > > The alternative I considered was to dynamically allocate the > > > > kvm_mmu_memory_cache structs. This would get rid of the anonymous > > > > struct and the objects array, and also eliminate the rather gross > > > > capacity hack in kvm_mmu_topup_memory_cache(). > > > > > > > > The downsides of this approach is more code and more failure paths if > > > > the allocation fails. > > > > > > I tried changing all kvm_mmu_memory_cache structs to be dynamically > > > allocated, but it created a lot of complexity to the setup/teardown > > > code paths in x86, arm64, mips, and riscv (the arches that use the > > > caches). I don't think this route is worth it, especially since these > > > structs don't *need* to be dynamically allocated. > > > > > > When you said the anonymous struct feels brittle, what did you have in > > > mind specifically? > > > > I can perfectly see someone using a kvm_mmu_memory_cache and searching > > for a bit why they end-up with memory corruption. Yes, this would be a > > rookie mistake, but this are some expectations all over the kernel > > that DEFINE_* and the corresponding structure are the same object. > > That is a good point. And that risk is very real given that > kvm_mmu_topup_memory_cache() assumes the capacity is > KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE if the capacity field is 0. Exactly. I like being surprised as much as the next one, but I'm not sure about this particular instance ;-). > One way to mitigate this would be to get rid of the capacity hack in > kvm_mmu_topup_memory_cache() and require the capacity field be > explicitly initialized. That will make it harder to trip over this > and/or easier to debug because kvm_mmu_topup_memory_cache() can issue > a WARN() if the capacity is 0. Once you see that warning and go to > initialize the capacity field you'll realize why it needs to be set in > the first place. The diff will just be slightly larger to set capacity > for each cache. That'd be fine. I don't mind the extra diff if this can be made more or less foolproof. Thanks, M.
On Mon, Mar 7, 2022 at 3:49 PM David Matlack <dmatlack@google.com> wrote: > > On Sat, Mar 5, 2022 at 8:55 AM Marc Zyngier <maz@kernel.org> wrote: > > > > On Fri, 04 Mar 2022 21:59:12 +0000, > > David Matlack <dmatlack@google.com> wrote: > > > I see two alternatives to make this cleaner: > > > > > > 1. Dynamically allocate just this cache. The caches defined in > > > vcpu_arch will continue to use DEFINE_KVM_MMU_MEMORY_CACHE(). This > > > would get rid of the outer struct but require an extra memory > > > allocation. > > > 2. Move this cache to struct kvm_arch using > > > DEFINE_KVM_MMU_MEMORY_CACHE(). Then we don't need to stack allocate it > > > or dynamically allocate it. > > > > > > Do either of these approaches appeal to you more than the current one? > > > > Certainly, #2 feels more solid. Dynamic allocations (and the resulting > > pointer chasing) are usually costly in terms of performance, so I'd > > avoid it if at all possible. > > > > That being said, if it turns out that #2 isn't practical, I won't get > > in the way of your current approach. Moving kvm_mmu_memory_cache to > > core code was definitely a good cleanup, and I'm not overly excited > > with the perspective of *more* arch-specific code. > > Ok I'll play with #2. Thanks for the feedback. #2 is very clean to implement but it ends up being a bit silly. It increases the size of struct kvm_arch by 336 bytes for all VMs but only ever gets used during kvm_vgic_map_resources(), which is only called the first time a vCPU is run (according to the comment in kvm_arch_vcpu_run_pid_change()). I think stack allocation makes the most sense for this object, I don't think it's worth dancing around that solely to avoid the inner struct grottiness.
On Wed, 09 Mar 2022 21:49:01 +0000, David Matlack <dmatlack@google.com> wrote: > > On Mon, Mar 7, 2022 at 3:49 PM David Matlack <dmatlack@google.com> wrote: > > > > On Sat, Mar 5, 2022 at 8:55 AM Marc Zyngier <maz@kernel.org> wrote: > > > > > > On Fri, 04 Mar 2022 21:59:12 +0000, > > > David Matlack <dmatlack@google.com> wrote: > > > > I see two alternatives to make this cleaner: > > > > > > > > 1. Dynamically allocate just this cache. The caches defined in > > > > vcpu_arch will continue to use DEFINE_KVM_MMU_MEMORY_CACHE(). This > > > > would get rid of the outer struct but require an extra memory > > > > allocation. > > > > 2. Move this cache to struct kvm_arch using > > > > DEFINE_KVM_MMU_MEMORY_CACHE(). Then we don't need to stack allocate it > > > > or dynamically allocate it. > > > > > > > > Do either of these approaches appeal to you more than the current one? > > > > > > Certainly, #2 feels more solid. Dynamic allocations (and the resulting > > > pointer chasing) are usually costly in terms of performance, so I'd > > > avoid it if at all possible. > > > > > > That being said, if it turns out that #2 isn't practical, I won't get > > > in the way of your current approach. Moving kvm_mmu_memory_cache to > > > core code was definitely a good cleanup, and I'm not overly excited > > > with the perspective of *more* arch-specific code. > > > > Ok I'll play with #2. Thanks for the feedback. > > #2 is very clean to implement but it ends up being a bit silly. It > increases the size of struct kvm_arch by 336 bytes for all VMs but > only ever gets used during kvm_vgic_map_resources(), which is only > called the first time a vCPU is run (according to the comment in > kvm_arch_vcpu_run_pid_change()). I think stack allocation makes the > most sense for this object, I don't think it's worth dancing around > that solely to avoid the inner struct grottiness. Fair enough, and thanks for having had a look. I'll look at the next version once you post it. Thanks, M.
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3b44ea17af88..a450b91cc2d9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -357,7 +357,7 @@ struct kvm_vcpu_arch { bool pause; /* Cache some mmu pages needed inside spinlock regions */ - struct kvm_mmu_memory_cache mmu_page_cache; + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_cache); /* Target CPU and feature flags */ int target; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index bc2aba953299..9c853c529b49 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -765,7 +765,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, { phys_addr_t addr; int ret = 0; - struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, }; + DEFINE_KVM_MMU_MEMORY_CACHE(cache) page_cache = {}; + struct kvm_mmu_memory_cache *cache = &page_cache.cache; struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_R | @@ -774,18 +775,17 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (is_protected_kvm_enabled()) return -EPERM; + cache->gfp_zero = __GFP_ZERO; size += offset_in_page(guest_ipa); guest_ipa &= PAGE_MASK; for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) { - ret = kvm_mmu_topup_memory_cache(&cache, - kvm_mmu_cache_min_pages(kvm)); + ret = kvm_mmu_topup_memory_cache(cache, kvm_mmu_cache_min_pages(kvm)); if (ret) break; spin_lock(&kvm->mmu_lock); - ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot, - &cache); + ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot, cache); spin_unlock(&kvm->mmu_lock); if (ret) break; @@ -793,7 +793,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, pa += PAGE_SIZE; } - kvm_mmu_free_memory_cache(&cache); + kvm_mmu_free_memory_cache(cache); return ret; } diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 72b90d45a46e..82bbcbc3ead6 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -346,7 +346,7 @@ struct kvm_vcpu_arch { unsigned long pending_exceptions_clr; /* Cache some mmu pages needed inside spinlock regions */ - struct kvm_mmu_memory_cache mmu_page_cache; + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_cache); /* vcpu's vzguestid is different on each host cpu in an smp system */ u32 vzguestid[NR_CPUS]; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f00004c13ccf..d0b12bfe5818 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -684,10 +684,10 @@ struct kvm_vcpu_arch { */ struct kvm_mmu *walk_mmu; - struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; - struct kvm_mmu_memory_cache mmu_shadow_page_cache; - struct kvm_mmu_memory_cache mmu_shadowed_translation_cache; - struct kvm_mmu_memory_cache mmu_page_header_cache; + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_pte_list_desc_cache); + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_shadow_page_cache); + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_shadowed_translation_cache); + DEFINE_KVM_MMU_MEMORY_CACHE(mmu_page_header_cache); /* * QEMU userspace and the guest each have their own FPU state. diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index dceac12c1ce5..9575fb8d333f 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -78,14 +78,34 @@ struct gfn_to_pfn_cache { * MMU flows is problematic, as is triggering reclaim, I/O, etc... while * holding MMU locks. Note, these caches act more like prefetch buffers than * classical caches, i.e. objects are not returned to the cache on being freed. + * + * The storage for the cache objects is laid out after the struct to allow + * different declarations to choose different capacities. If the capacity field + * is 0, the capacity is assumed to be KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE. */ struct kvm_mmu_memory_cache { int nobjs; + int capacity; gfp_t gfp_zero; struct kmem_cache *kmem_cache; - void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE]; + void *objects[0]; }; -#endif + +/* + * Note, if defining a memory cache with a non-default capacity, you must + * initialize the capacity field at runtime. + */ +#define __DEFINE_KVM_MMU_MEMORY_CACHE(_name, _capacity) \ + struct { \ + struct kvm_mmu_memory_cache _name; \ + void *_name##_objects[_capacity]; \ + } + +/* Define a memory cache with the default capacity. */ +#define DEFINE_KVM_MMU_MEMORY_CACHE(_name) \ + __DEFINE_KVM_MMU_MEMORY_CACHE(_name, KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE) + +#endif /* KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE */ #define HALT_POLL_HIST_COUNT 32 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 034c567a680c..afa4bdb6481e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -373,11 +373,17 @@ static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc, int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min) { + int capacity; void *obj; + if (mc->capacity) + capacity = mc->capacity; + else + capacity = KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE; + if (mc->nobjs >= min) return 0; - while (mc->nobjs < ARRAY_SIZE(mc->objects)) { + while (mc->nobjs < capacity) { obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT); if (!obj) return mc->nobjs >= min ? 0 : -ENOMEM;
Allow the capacity of the kvm_mmu_memory_cache struct to be chosen at declaration time rather than being fixed for all declarations. This will be used in a follow-up commit to declare an cache in x86 with a capacity of 512+ objects without having to increase the capacity of all caches in KVM. No functional change intended. Signed-off-by: David Matlack <dmatlack@google.com> --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/mmu.c | 12 ++++++------ arch/mips/include/asm/kvm_host.h | 2 +- arch/x86/include/asm/kvm_host.h | 8 ++++---- include/linux/kvm_types.h | 24 ++++++++++++++++++++++-- virt/kvm/kvm_main.c | 8 +++++++- 6 files changed, 41 insertions(+), 15 deletions(-)