Message ID | 20240227232100.478238-3-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | TDX/SNP part 1 of n, for 6.9 | expand |
On 2/28/2024 7:20 AM, Paolo Bonzini wrote: > From: Sean Christopherson <seanjc@google.com> > > Add support to MMU caches for initializing a page with a custom 64-bit > value, e.g. to pre-fill an entire page table with non-zero PTE values. > The functionality will be used by x86 to support Intel's TDX, which needs > to set bit 63 in all non-present PTEs in order to prevent !PRESENT page > faults from getting reflected into the guest (Intel's EPT Violation #VE > architecture made the less than brilliant decision of having the per-PTE > behavior be opt-out instead of opt-in). > > Signed-off-by: Sean Christopherson <seanjc@google.com> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > Message-Id: <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com> > --- > include/linux/kvm_types.h | 1 + > virt/kvm/kvm_main.c | 16 ++++++++++++++-- > 2 files changed, 15 insertions(+), 2 deletions(-) > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h > index d93f6522b2c3..827ecc0b7e10 100644 > --- a/include/linux/kvm_types.h > +++ b/include/linux/kvm_types.h > @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache { > struct kvm_mmu_memory_cache { > gfp_t gfp_zero; > gfp_t gfp_custom; > + u64 init_value; > struct kmem_cache *kmem_cache; > int capacity; > int nobjs; > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 9c99c9373a3e..c9828feb7a1c 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm) > static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc, > gfp_t gfp_flags) > { > + void *page; > + > gfp_flags |= mc->gfp_zero; > > if (mc->kmem_cache) > return kmem_cache_alloc(mc->kmem_cache, gfp_flags); > - else > - return (void *)__get_free_page(gfp_flags); > + > + page = (void *)__get_free_page(gfp_flags); > + if (page && mc->init_value) > + memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value)); > + return page; > } > > int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min) > @@ -421,6 +426,13 @@ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, > if (WARN_ON_ONCE(!capacity)) > return -EIO; > > + /* > + * Custom init values can be used only for page allocations, > + * and obviously conflict with __GFP_ZERO. > + */ > + if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || mc->gfp_zero))) > + return -EIO; > + > mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp); > if (!mc->objects) > return -ENOMEM;
On 2/28/2024 7:20 AM, Paolo Bonzini wrote: > From: Sean Christopherson <seanjc@google.com> > > Add support to MMU caches for initializing a page with a custom 64-bit > value, e.g. to pre-fill an entire page table with non-zero PTE values. > The functionality will be used by x86 to support Intel's TDX, which needs > to set bit 63 in all non-present PTEs in order to prevent !PRESENT page > faults from getting reflected into the guest (Intel's EPT Violation #VE > architecture made the less than brilliant decision of having the per-PTE > behavior be opt-out instead of opt-in). > > Signed-off-by: Sean Christopherson <seanjc@google.com> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > Message-Id: <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > include/linux/kvm_types.h | 1 + > virt/kvm/kvm_main.c | 16 ++++++++++++++-- > 2 files changed, 15 insertions(+), 2 deletions(-) Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com> > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h > index d93f6522b2c3..827ecc0b7e10 100644 > --- a/include/linux/kvm_types.h > +++ b/include/linux/kvm_types.h > @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache { > struct kvm_mmu_memory_cache { > gfp_t gfp_zero; > gfp_t gfp_custom; > + u64 init_value; > struct kmem_cache *kmem_cache; > int capacity; > int nobjs; > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 9c99c9373a3e..c9828feb7a1c 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm) > static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc, > gfp_t gfp_flags) > { > + void *page; > + > gfp_flags |= mc->gfp_zero; > > if (mc->kmem_cache) > return kmem_cache_alloc(mc->kmem_cache, gfp_flags); > - else > - return (void *)__get_free_page(gfp_flags); > + > + page = (void *)__get_free_page(gfp_flags); > + if (page && mc->init_value) > + memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value)); > + return page; > } > > int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min) > @@ -421,6 +426,13 @@ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, > if (WARN_ON_ONCE(!capacity)) > return -EIO; > > + /* > + * Custom init values can be used only for page allocations, > + * and obviously conflict with __GFP_ZERO. > + */ > + if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || mc->gfp_zero))) > + return -EIO; > + > mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp); > if (!mc->objects) > return -ENOMEM;
On 3/5/2024 2:55 PM, Binbin Wu wrote: > > > On 2/28/2024 7:20 AM, Paolo Bonzini wrote: >> From: Sean Christopherson <seanjc@google.com> >> >> Add support to MMU caches for initializing a page with a custom 64-bit >> value, e.g. to pre-fill an entire page table with non-zero PTE values. >> The functionality will be used by x86 to support Intel's TDX, which >> needs >> to set bit 63 in all non-present PTEs in order to prevent !PRESENT page >> faults from getting reflected into the guest (Intel's EPT Violation #VE >> architecture made the less than brilliant decision of having the per-PTE >> behavior be opt-out instead of opt-in). >> >> Signed-off-by: Sean Christopherson <seanjc@google.com> >> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> >> Message-Id: >> <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >> --- >> include/linux/kvm_types.h | 1 + >> virt/kvm/kvm_main.c | 16 ++++++++++++++-- >> 2 files changed, 15 insertions(+), 2 deletions(-) > > Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com> > >> >> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h >> index d93f6522b2c3..827ecc0b7e10 100644 >> --- a/include/linux/kvm_types.h >> +++ b/include/linux/kvm_types.h >> @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache { >> struct kvm_mmu_memory_cache { >> gfp_t gfp_zero; >> gfp_t gfp_custom; >> + u64 init_value; >> struct kmem_cache *kmem_cache; >> int capacity; >> int nobjs; >> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c >> index 9c99c9373a3e..c9828feb7a1c 100644 >> --- a/virt/kvm/kvm_main.c >> +++ b/virt/kvm/kvm_main.c >> @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm) >> static inline void *mmu_memory_cache_alloc_obj(struct >> kvm_mmu_memory_cache *mc, >> gfp_t gfp_flags) >> { >> + void *page; >> + >> gfp_flags |= mc->gfp_zero; >> if (mc->kmem_cache) >> return kmem_cache_alloc(mc->kmem_cache, gfp_flags); >> - else >> - return (void *)__get_free_page(gfp_flags); >> + >> + page = (void *)__get_free_page(gfp_flags); >> + if (page && mc->init_value) >> + memset64(page, mc->init_value, PAGE_SIZE / >> sizeof(mc->init_value)); Do we need a static_assert() to make sure mc->init_value is 64bit? >> + return page; >> } >> int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, >> int capacity, int min) >> @@ -421,6 +426,13 @@ int __kvm_mmu_topup_memory_cache(struct >> kvm_mmu_memory_cache *mc, int capacity, >> if (WARN_ON_ONCE(!capacity)) >> return -EIO; >> + /* >> + * Custom init values can be used only for page allocations, >> + * and obviously conflict with __GFP_ZERO. >> + */ >> + if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || >> mc->gfp_zero))) >> + return -EIO; >> + >> mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp); >> if (!mc->objects) >> return -ENOMEM; > >
On Tue, Mar 26, 2024 at 11:56:35PM +0800, Binbin Wu <binbin.wu@linux.intel.com> wrote: > On 3/5/2024 2:55 PM, Binbin Wu wrote: > > > > > > On 2/28/2024 7:20 AM, Paolo Bonzini wrote: > > > From: Sean Christopherson <seanjc@google.com> > > > > > > Add support to MMU caches for initializing a page with a custom 64-bit > > > value, e.g. to pre-fill an entire page table with non-zero PTE values. > > > The functionality will be used by x86 to support Intel's TDX, which > > > needs > > > to set bit 63 in all non-present PTEs in order to prevent !PRESENT page > > > faults from getting reflected into the guest (Intel's EPT Violation #VE > > > architecture made the less than brilliant decision of having the per-PTE > > > behavior be opt-out instead of opt-in). > > > > > > Signed-off-by: Sean Christopherson <seanjc@google.com> > > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > > > Message-Id: <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com> > > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > > --- > > > include/linux/kvm_types.h | 1 + > > > virt/kvm/kvm_main.c | 16 ++++++++++++++-- > > > 2 files changed, 15 insertions(+), 2 deletions(-) > > > > Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com> > > > > > > > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h > > > index d93f6522b2c3..827ecc0b7e10 100644 > > > --- a/include/linux/kvm_types.h > > > +++ b/include/linux/kvm_types.h > > > @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache { > > > struct kvm_mmu_memory_cache { > > > gfp_t gfp_zero; > > > gfp_t gfp_custom; > > > + u64 init_value; > > > struct kmem_cache *kmem_cache; > > > int capacity; > > > int nobjs; > > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > > > index 9c99c9373a3e..c9828feb7a1c 100644 > > > --- a/virt/kvm/kvm_main.c > > > +++ b/virt/kvm/kvm_main.c > > > @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm) > > > static inline void *mmu_memory_cache_alloc_obj(struct > > > kvm_mmu_memory_cache *mc, > > > gfp_t gfp_flags) > > > { > > > + void *page; > > > + > > > gfp_flags |= mc->gfp_zero; > > > if (mc->kmem_cache) > > > return kmem_cache_alloc(mc->kmem_cache, gfp_flags); > > > - else > > > - return (void *)__get_free_page(gfp_flags); > > > + > > > + page = (void *)__get_free_page(gfp_flags); > > > + if (page && mc->init_value) > > > + memset64(page, mc->init_value, PAGE_SIZE / > > > sizeof(mc->init_value)); > > Do we need a static_assert() to make sure mc->init_value is 64bit? That's overkill because EPT entry is defined as 64bit and KVM uses u64 for it uniformly.
On Mon, May 13, 2024 at 01:38:39PM -0700, Isaku Yamahata <isaku.yamahata@intel.com> wrote: > > > > index 9c99c9373a3e..c9828feb7a1c 100644 > > > > --- a/virt/kvm/kvm_main.c > > > > +++ b/virt/kvm/kvm_main.c > > > > @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm) > > > > static inline void *mmu_memory_cache_alloc_obj(struct > > > > kvm_mmu_memory_cache *mc, > > > > gfp_t gfp_flags) > > > > { > > > > + void *page; > > > > + > > > > gfp_flags |= mc->gfp_zero; > > > > if (mc->kmem_cache) > > > > return kmem_cache_alloc(mc->kmem_cache, gfp_flags); > > > > - else > > > > - return (void *)__get_free_page(gfp_flags); > > > > + > > > > + page = (void *)__get_free_page(gfp_flags); > > > > + if (page && mc->init_value) > > > > + memset64(page, mc->init_value, PAGE_SIZE / > > > > sizeof(mc->init_value)); > > > > Do we need a static_assert() to make sure mc->init_value is 64bit? > > That's overkill because EPT entry is defined as 64bit and KVM uses u64 for it > uniformly. Oops, I picked the wrong mail to reply. Sorry for noise.
On Mon, May 13, 2024, Isaku Yamahata wrote: > On Tue, Mar 26, 2024 at 11:56:35PM +0800, Binbin Wu <binbin.wu@linux.intel.com> wrote: > > > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h > > > > index d93f6522b2c3..827ecc0b7e10 100644 > > > > --- a/include/linux/kvm_types.h > > > > +++ b/include/linux/kvm_types.h > > > > @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache { > > > > struct kvm_mmu_memory_cache { > > > > gfp_t gfp_zero; > > > > gfp_t gfp_custom; > > > > + u64 init_value; > > > > struct kmem_cache *kmem_cache; > > > > int capacity; > > > > int nobjs; > > > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > > > > index 9c99c9373a3e..c9828feb7a1c 100644 > > > > --- a/virt/kvm/kvm_main.c > > > > +++ b/virt/kvm/kvm_main.c > > > > @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm) > > > > static inline void *mmu_memory_cache_alloc_obj(struct > > > > kvm_mmu_memory_cache *mc, > > > > gfp_t gfp_flags) > > > > { > > > > + void *page; > > > > + > > > > gfp_flags |= mc->gfp_zero; > > > > if (mc->kmem_cache) > > > > return kmem_cache_alloc(mc->kmem_cache, gfp_flags); > > > > - else > > > > - return (void *)__get_free_page(gfp_flags); > > > > + > > > > + page = (void *)__get_free_page(gfp_flags); > > > > + if (page && mc->init_value) > > > > + memset64(page, mc->init_value, PAGE_SIZE / > > > > sizeof(mc->init_value)); > > > > Do we need a static_assert() to make sure mc->init_value is 64bit? > > That's overkill because EPT entry is defined as 64bit and KVM uses u64 for it > uniformly. I'm pretty sure Binbin is talking about passing init_value to memset64(), not about whether or not that suffices for EPT. So I wouldn't say it's overkill. However, I don't think a static assert is warranted. Functionally, tracking init_value as a u32 or even a u8 would be a-ok as it's a copy-by-value parameter that won't be sign-extended or truncated. I.e. the real reqiurement comes from TDX wanting to set a 64-bit value. And trying to set bit 63 in a 32-bit field _will_ make the compiler unhappy: arch/x86/kvm/mmu/mmu.c: In function ‘kvm_mmu_create’: include/vdso/bits.h:8:33: error: conversion from ‘long long unsigned int’ to ‘u32’ {aka ‘unsigned int’} changes value from ‘9223372036854775808’ to ‘0’ [-Werror=overflow] 8 | #define BIT_ULL(nr) (ULL(1) << (nr)) | ^ arch/x86/kvm/mmu/spte.h:162:33: note: in expansion of macro ‘BIT_ULL’ 162 | #define SHADOW_NONPRESENT_VALUE BIT_ULL(63) | ^~~~~~~ arch/x86/kvm/mmu/mmu.c:6225:17: note: in expansion of macro ‘SHADOW_NONPRESENT_VALUE’ 6225 | SHADOW_NONPRESENT_VALUE; | ^~~~~~~~~~~~~~~~~~~~~~~ I suppose one could argue that changing init_value to a u128 could result in undetected truncation, but IMO that firmly crosses into ridiculous territory.
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index d93f6522b2c3..827ecc0b7e10 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache { struct kvm_mmu_memory_cache { gfp_t gfp_zero; gfp_t gfp_custom; + u64 init_value; struct kmem_cache *kmem_cache; int capacity; int nobjs; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9c99c9373a3e..c9828feb7a1c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm) static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc, gfp_t gfp_flags) { + void *page; + gfp_flags |= mc->gfp_zero; if (mc->kmem_cache) return kmem_cache_alloc(mc->kmem_cache, gfp_flags); - else - return (void *)__get_free_page(gfp_flags); + + page = (void *)__get_free_page(gfp_flags); + if (page && mc->init_value) + memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value)); + return page; } int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min) @@ -421,6 +426,13 @@ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, if (WARN_ON_ONCE(!capacity)) return -EIO; + /* + * Custom init values can be used only for page allocations, + * and obviously conflict with __GFP_ZERO. + */ + if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || mc->gfp_zero))) + return -EIO; + mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp); if (!mc->objects) return -ENOMEM;