diff mbox series

[02/21] KVM: Allow page-sized MMU caches to be initialized with custom 64-bit values

Message ID 20240227232100.478238-3-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show
Series TDX/SNP part 1 of n, for 6.9 | expand

Commit Message

Paolo Bonzini Feb. 27, 2024, 11:20 p.m. UTC
From: Sean Christopherson <seanjc@google.com>

Add support to MMU caches for initializing a page with a custom 64-bit
value, e.g. to pre-fill an entire page table with non-zero PTE values.
The functionality will be used by x86 to support Intel's TDX, which needs
to set bit 63 in all non-present PTEs in order to prevent !PRESENT page
faults from getting reflected into the guest (Intel's EPT Violation #VE
architecture made the less than brilliant decision of having the per-PTE
behavior be opt-out instead of opt-in).

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Message-Id: <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_types.h |  1 +
 virt/kvm/kvm_main.c       | 16 ++++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

Comments

Xiaoyao Li Feb. 29, 2024, 1:46 p.m. UTC | #1
On 2/28/2024 7:20 AM, Paolo Bonzini wrote:
> From: Sean Christopherson <seanjc@google.com>
> 
> Add support to MMU caches for initializing a page with a custom 64-bit
> value, e.g. to pre-fill an entire page table with non-zero PTE values.
> The functionality will be used by x86 to support Intel's TDX, which needs
> to set bit 63 in all non-present PTEs in order to prevent !PRESENT page
> faults from getting reflected into the guest (Intel's EPT Violation #VE
> architecture made the less than brilliant decision of having the per-PTE
> behavior be opt-out instead of opt-in).
> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Message-Id: <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>

> ---
>   include/linux/kvm_types.h |  1 +
>   virt/kvm/kvm_main.c       | 16 ++++++++++++++--
>   2 files changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index d93f6522b2c3..827ecc0b7e10 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache {
>   struct kvm_mmu_memory_cache {
>   	gfp_t gfp_zero;
>   	gfp_t gfp_custom;
> +	u64 init_value;
>   	struct kmem_cache *kmem_cache;
>   	int capacity;
>   	int nobjs;
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 9c99c9373a3e..c9828feb7a1c 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
>   static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
>   					       gfp_t gfp_flags)
>   {
> +	void *page;
> +
>   	gfp_flags |= mc->gfp_zero;
>   
>   	if (mc->kmem_cache)
>   		return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
> -	else
> -		return (void *)__get_free_page(gfp_flags);
> +
> +	page = (void *)__get_free_page(gfp_flags);
> +	if (page && mc->init_value)
> +		memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value));
> +	return page;
>   }
>   
>   int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min)
> @@ -421,6 +426,13 @@ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity,
>   		if (WARN_ON_ONCE(!capacity))
>   			return -EIO;
>   
> +		/*
> +		 * Custom init values can be used only for page allocations,
> +		 * and obviously conflict with __GFP_ZERO.
> +		 */
> +		if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || mc->gfp_zero)))
> +			return -EIO;
> +
>   		mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp);
>   		if (!mc->objects)
>   			return -ENOMEM;
Binbin Wu March 5, 2024, 6:55 a.m. UTC | #2
On 2/28/2024 7:20 AM, Paolo Bonzini wrote:
> From: Sean Christopherson <seanjc@google.com>
>
> Add support to MMU caches for initializing a page with a custom 64-bit
> value, e.g. to pre-fill an entire page table with non-zero PTE values.
> The functionality will be used by x86 to support Intel's TDX, which needs
> to set bit 63 in all non-present PTEs in order to prevent !PRESENT page
> faults from getting reflected into the guest (Intel's EPT Violation #VE
> architecture made the less than brilliant decision of having the per-PTE
> behavior be opt-out instead of opt-in).
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Message-Id: <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   include/linux/kvm_types.h |  1 +
>   virt/kvm/kvm_main.c       | 16 ++++++++++++++--
>   2 files changed, 15 insertions(+), 2 deletions(-)

Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>

>
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index d93f6522b2c3..827ecc0b7e10 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache {
>   struct kvm_mmu_memory_cache {
>   	gfp_t gfp_zero;
>   	gfp_t gfp_custom;
> +	u64 init_value;
>   	struct kmem_cache *kmem_cache;
>   	int capacity;
>   	int nobjs;
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 9c99c9373a3e..c9828feb7a1c 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
>   static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
>   					       gfp_t gfp_flags)
>   {
> +	void *page;
> +
>   	gfp_flags |= mc->gfp_zero;
>   
>   	if (mc->kmem_cache)
>   		return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
> -	else
> -		return (void *)__get_free_page(gfp_flags);
> +
> +	page = (void *)__get_free_page(gfp_flags);
> +	if (page && mc->init_value)
> +		memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value));
> +	return page;
>   }
>   
>   int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min)
> @@ -421,6 +426,13 @@ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity,
>   		if (WARN_ON_ONCE(!capacity))
>   			return -EIO;
>   
> +		/*
> +		 * Custom init values can be used only for page allocations,
> +		 * and obviously conflict with __GFP_ZERO.
> +		 */
> +		if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || mc->gfp_zero)))
> +			return -EIO;
> +
>   		mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp);
>   		if (!mc->objects)
>   			return -ENOMEM;
Binbin Wu March 26, 2024, 3:56 p.m. UTC | #3
On 3/5/2024 2:55 PM, Binbin Wu wrote:
>
>
> On 2/28/2024 7:20 AM, Paolo Bonzini wrote:
>> From: Sean Christopherson <seanjc@google.com>
>>
>> Add support to MMU caches for initializing a page with a custom 64-bit
>> value, e.g. to pre-fill an entire page table with non-zero PTE values.
>> The functionality will be used by x86 to support Intel's TDX, which 
>> needs
>> to set bit 63 in all non-present PTEs in order to prevent !PRESENT page
>> faults from getting reflected into the guest (Intel's EPT Violation #VE
>> architecture made the less than brilliant decision of having the per-PTE
>> behavior be opt-out instead of opt-in).
>>
>> Signed-off-by: Sean Christopherson <seanjc@google.com>
>> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
>> Message-Id: 
>> <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>   include/linux/kvm_types.h |  1 +
>>   virt/kvm/kvm_main.c       | 16 ++++++++++++++--
>>   2 files changed, 15 insertions(+), 2 deletions(-)
>
> Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
>
>>
>> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
>> index d93f6522b2c3..827ecc0b7e10 100644
>> --- a/include/linux/kvm_types.h
>> +++ b/include/linux/kvm_types.h
>> @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache {
>>   struct kvm_mmu_memory_cache {
>>       gfp_t gfp_zero;
>>       gfp_t gfp_custom;
>> +    u64 init_value;
>>       struct kmem_cache *kmem_cache;
>>       int capacity;
>>       int nobjs;
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 9c99c9373a3e..c9828feb7a1c 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
>>   static inline void *mmu_memory_cache_alloc_obj(struct 
>> kvm_mmu_memory_cache *mc,
>>                              gfp_t gfp_flags)
>>   {
>> +    void *page;
>> +
>>       gfp_flags |= mc->gfp_zero;
>>         if (mc->kmem_cache)
>>           return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
>> -    else
>> -        return (void *)__get_free_page(gfp_flags);
>> +
>> +    page = (void *)__get_free_page(gfp_flags);
>> +    if (page && mc->init_value)
>> +        memset64(page, mc->init_value, PAGE_SIZE / 
>> sizeof(mc->init_value));

Do we need a static_assert() to make sure mc->init_value is 64bit?

>> +    return page;
>>   }
>>     int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, 
>> int capacity, int min)
>> @@ -421,6 +426,13 @@ int __kvm_mmu_topup_memory_cache(struct 
>> kvm_mmu_memory_cache *mc, int capacity,
>>           if (WARN_ON_ONCE(!capacity))
>>               return -EIO;
>>   +        /*
>> +         * Custom init values can be used only for page allocations,
>> +         * and obviously conflict with __GFP_ZERO.
>> +         */
>> +        if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || 
>> mc->gfp_zero)))
>> +            return -EIO;
>> +
>>           mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp);
>>           if (!mc->objects)
>>               return -ENOMEM;
>
>
Isaku Yamahata May 13, 2024, 8:38 p.m. UTC | #4
On Tue, Mar 26, 2024 at 11:56:35PM +0800,
Binbin Wu <binbin.wu@linux.intel.com> wrote:

> On 3/5/2024 2:55 PM, Binbin Wu wrote:
> > 
> > 
> > On 2/28/2024 7:20 AM, Paolo Bonzini wrote:
> > > From: Sean Christopherson <seanjc@google.com>
> > > 
> > > Add support to MMU caches for initializing a page with a custom 64-bit
> > > value, e.g. to pre-fill an entire page table with non-zero PTE values.
> > > The functionality will be used by x86 to support Intel's TDX, which
> > > needs
> > > to set bit 63 in all non-present PTEs in order to prevent !PRESENT page
> > > faults from getting reflected into the guest (Intel's EPT Violation #VE
> > > architecture made the less than brilliant decision of having the per-PTE
> > > behavior be opt-out instead of opt-in).
> > > 
> > > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > > Message-Id: <5919f685f109a1b0ebc6bd8fc4536ee94bcc172d.1705965635.git.isaku.yamahata@intel.com>
> > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > > ---
> > >   include/linux/kvm_types.h |  1 +
> > >   virt/kvm/kvm_main.c       | 16 ++++++++++++++--
> > >   2 files changed, 15 insertions(+), 2 deletions(-)
> > 
> > Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
> > 
> > > 
> > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> > > index d93f6522b2c3..827ecc0b7e10 100644
> > > --- a/include/linux/kvm_types.h
> > > +++ b/include/linux/kvm_types.h
> > > @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache {
> > >   struct kvm_mmu_memory_cache {
> > >       gfp_t gfp_zero;
> > >       gfp_t gfp_custom;
> > > +    u64 init_value;
> > >       struct kmem_cache *kmem_cache;
> > >       int capacity;
> > >       int nobjs;
> > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > index 9c99c9373a3e..c9828feb7a1c 100644
> > > --- a/virt/kvm/kvm_main.c
> > > +++ b/virt/kvm/kvm_main.c
> > > @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
> > >   static inline void *mmu_memory_cache_alloc_obj(struct
> > > kvm_mmu_memory_cache *mc,
> > >                              gfp_t gfp_flags)
> > >   {
> > > +    void *page;
> > > +
> > >       gfp_flags |= mc->gfp_zero;
> > >         if (mc->kmem_cache)
> > >           return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
> > > -    else
> > > -        return (void *)__get_free_page(gfp_flags);
> > > +
> > > +    page = (void *)__get_free_page(gfp_flags);
> > > +    if (page && mc->init_value)
> > > +        memset64(page, mc->init_value, PAGE_SIZE /
> > > sizeof(mc->init_value));
> 
> Do we need a static_assert() to make sure mc->init_value is 64bit?

That's overkill because EPT entry is defined as 64bit and KVM uses u64 for it
uniformly.
Isaku Yamahata May 13, 2024, 8:51 p.m. UTC | #5
On Mon, May 13, 2024 at 01:38:39PM -0700,
Isaku Yamahata <isaku.yamahata@intel.com> wrote:

> > > > index 9c99c9373a3e..c9828feb7a1c 100644
> > > > --- a/virt/kvm/kvm_main.c
> > > > +++ b/virt/kvm/kvm_main.c
> > > > @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
> > > >   static inline void *mmu_memory_cache_alloc_obj(struct
> > > > kvm_mmu_memory_cache *mc,
> > > >                              gfp_t gfp_flags)
> > > >   {
> > > > +    void *page;
> > > > +
> > > >       gfp_flags |= mc->gfp_zero;
> > > >         if (mc->kmem_cache)
> > > >           return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
> > > > -    else
> > > > -        return (void *)__get_free_page(gfp_flags);
> > > > +
> > > > +    page = (void *)__get_free_page(gfp_flags);
> > > > +    if (page && mc->init_value)
> > > > +        memset64(page, mc->init_value, PAGE_SIZE /
> > > > sizeof(mc->init_value));
> > 
> > Do we need a static_assert() to make sure mc->init_value is 64bit?
> 
> That's overkill because EPT entry is defined as 64bit and KVM uses u64 for it
> uniformly.

Oops, I picked the wrong mail to reply. Sorry for noise.
Sean Christopherson May 13, 2024, 8:56 p.m. UTC | #6
On Mon, May 13, 2024, Isaku Yamahata wrote:
> On Tue, Mar 26, 2024 at 11:56:35PM +0800, Binbin Wu <binbin.wu@linux.intel.com> wrote:
> > > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> > > > index d93f6522b2c3..827ecc0b7e10 100644
> > > > --- a/include/linux/kvm_types.h
> > > > +++ b/include/linux/kvm_types.h
> > > > @@ -86,6 +86,7 @@ struct gfn_to_pfn_cache {
> > > >   struct kvm_mmu_memory_cache {
> > > >       gfp_t gfp_zero;
> > > >       gfp_t gfp_custom;
> > > > +    u64 init_value;
> > > >       struct kmem_cache *kmem_cache;
> > > >       int capacity;
> > > >       int nobjs;
> > > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > > index 9c99c9373a3e..c9828feb7a1c 100644
> > > > --- a/virt/kvm/kvm_main.c
> > > > +++ b/virt/kvm/kvm_main.c
> > > > @@ -401,12 +401,17 @@ static void kvm_flush_shadow_all(struct kvm *kvm)
> > > >   static inline void *mmu_memory_cache_alloc_obj(struct
> > > > kvm_mmu_memory_cache *mc,
> > > >                              gfp_t gfp_flags)
> > > >   {
> > > > +    void *page;
> > > > +
> > > >       gfp_flags |= mc->gfp_zero;
> > > >         if (mc->kmem_cache)
> > > >           return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
> > > > -    else
> > > > -        return (void *)__get_free_page(gfp_flags);
> > > > +
> > > > +    page = (void *)__get_free_page(gfp_flags);
> > > > +    if (page && mc->init_value)
> > > > +        memset64(page, mc->init_value, PAGE_SIZE /
> > > > sizeof(mc->init_value));
> > 
> > Do we need a static_assert() to make sure mc->init_value is 64bit?
> 
> That's overkill because EPT entry is defined as 64bit and KVM uses u64 for it
> uniformly.

I'm pretty sure Binbin is talking about passing init_value to memset64(), not
about whether or not that suffices for EPT.  So I wouldn't say it's overkill.

However, I don't think a static assert is warranted.  Functionally, tracking
init_value as a u32 or even a u8 would be a-ok as it's a copy-by-value parameter
that won't be sign-extended or truncated.  I.e. the real reqiurement comes from
TDX wanting to set a 64-bit value.

And trying to set bit 63 in a 32-bit field _will_ make the compiler unhappy:

arch/x86/kvm/mmu/mmu.c: In function ‘kvm_mmu_create’:
include/vdso/bits.h:8:33: error: conversion from ‘long long unsigned int’ to ‘u32’ {aka ‘unsigned int’} changes value from ‘9223372036854775808’ to ‘0’ [-Werror=overflow]
    8 | #define BIT_ULL(nr)             (ULL(1) << (nr))
      |                                 ^
arch/x86/kvm/mmu/spte.h:162:33: note: in expansion of macro ‘BIT_ULL’
  162 | #define SHADOW_NONPRESENT_VALUE BIT_ULL(63)
      |                                 ^~~~~~~
arch/x86/kvm/mmu/mmu.c:6225:17: note: in expansion of macro ‘SHADOW_NONPRESENT_VALUE’
 6225 |                 SHADOW_NONPRESENT_VALUE;
      |                 ^~~~~~~~~~~~~~~~~~~~~~~


I suppose one could argue that changing init_value to a u128 could result in
undetected truncation, but IMO that firmly crosses into ridiculous territory.
diff mbox series

Patch

diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index d93f6522b2c3..827ecc0b7e10 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -86,6 +86,7 @@  struct gfn_to_pfn_cache {
 struct kvm_mmu_memory_cache {
 	gfp_t gfp_zero;
 	gfp_t gfp_custom;
+	u64 init_value;
 	struct kmem_cache *kmem_cache;
 	int capacity;
 	int nobjs;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9c99c9373a3e..c9828feb7a1c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -401,12 +401,17 @@  static void kvm_flush_shadow_all(struct kvm *kvm)
 static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
 					       gfp_t gfp_flags)
 {
+	void *page;
+
 	gfp_flags |= mc->gfp_zero;
 
 	if (mc->kmem_cache)
 		return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
-	else
-		return (void *)__get_free_page(gfp_flags);
+
+	page = (void *)__get_free_page(gfp_flags);
+	if (page && mc->init_value)
+		memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value));
+	return page;
 }
 
 int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min)
@@ -421,6 +426,13 @@  int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity,
 		if (WARN_ON_ONCE(!capacity))
 			return -EIO;
 
+		/*
+		 * Custom init values can be used only for page allocations,
+		 * and obviously conflict with __GFP_ZERO.
+		 */
+		if (WARN_ON_ONCE(mc->init_value && (mc->kmem_cache || mc->gfp_zero)))
+			return -EIO;
+
 		mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp);
 		if (!mc->objects)
 			return -ENOMEM;