diff mbox series

[RFC,06/15] KVM: x86/mmu: Derive page role from parent

Message ID 20211119235759.1304274-7-dmatlack@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/mmu: Eager Page Splitting for the TDP MMU | expand

Commit Message

David Matlack Nov. 19, 2021, 11:57 p.m. UTC
Derive the page role from the parent shadow page, since the only thing
that changes is the level. This is in preparation for eagerly splitting
large pages during VM-ioctls which does not have access to the vCPU
MMU context.

No functional change intended.

Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 43 ++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

Comments

Paolo Bonzini Nov. 20, 2021, 12:53 p.m. UTC | #1
On 11/20/21 00:57, David Matlack wrote:
> Derive the page role from the parent shadow page, since the only thing
> that changes is the level. This is in preparation for eagerly splitting
> large pages during VM-ioctls which does not have access to the vCPU
> MMU context.
> 
> No functional change intended.
> 
> Signed-off-by: David Matlack <dmatlack@google.com>
> ---
>   arch/x86/kvm/mmu/tdp_mmu.c | 43 ++++++++++++++++++++------------------
>   1 file changed, 23 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index b70707a7fe87..1a409992a57f 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -157,23 +157,8 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
>   		if (kvm_mmu_page_as_id(_root) != _as_id) {		\
>   		} else
>   
> -static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu,
> -						   int level)
> -{
> -	union kvm_mmu_page_role role;
> -
> -	role = vcpu->arch.mmu->mmu_role.base;
> -	role.level = level;
> -	role.direct = true;
> -	role.gpte_is_8_bytes = true;
> -	role.access = ACC_ALL;
> -	role.ad_disabled = !shadow_accessed_mask;
> -
> -	return role;
> -}
> -
>   static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
> -					       int level)
> +					       union kvm_mmu_page_role role)
>   {
>   	struct kvm_mmu_memory_caches *mmu_caches;
>   	struct kvm_mmu_page *sp;
> @@ -184,7 +169,7 @@ static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
>   	sp->spt = kvm_mmu_memory_cache_alloc(&mmu_caches->shadow_page_cache);
>   	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
>   
> -	sp->role.word = page_role_for_level(vcpu, level).word;
> +	sp->role = role;
>   	sp->gfn = gfn;
>   	sp->tdp_mmu_page = true;
>   
> @@ -193,6 +178,19 @@ static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
>   	return sp;
>   }
>   
> +static struct kvm_mmu_page *alloc_child_tdp_mmu_page(struct kvm_vcpu *vcpu, struct tdp_iter *iter)
> +{
> +	struct kvm_mmu_page *parent_sp;
> +	union kvm_mmu_page_role role;
> +
> +	parent_sp = sptep_to_sp(rcu_dereference(iter->sptep));
> +
> +	role = parent_sp->role;
> +	role.level--;
> +
> +	return alloc_tdp_mmu_page(vcpu, iter->gfn, role);
> +}
> +
>   hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
>   {
>   	union kvm_mmu_page_role role;
> @@ -201,7 +199,12 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
>   
>   	lockdep_assert_held_write(&kvm->mmu_lock);
>   
> -	role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level);
> +	role = vcpu->arch.mmu->mmu_role.base;
> +	role.level = vcpu->arch.mmu->shadow_root_level;
> +	role.direct = true;
> +	role.gpte_is_8_bytes = true;
> +	role.access = ACC_ALL;
> +	role.ad_disabled = !shadow_accessed_mask;

I have a similar patch for the old MMU, but it was also replacing 
shadow_root_level with shadow_root_role.  I'll see if I can adapt it to 
the TDP MMU, since the shadow_root_role is obviously the same for both.

Paolo

>   	/* Check for an existing root before allocating a new one. */
>   	for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
> @@ -210,7 +213,7 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
>   			goto out;
>   	}
>   
> -	root = alloc_tdp_mmu_page(vcpu, 0, vcpu->arch.mmu->shadow_root_level);
> +	root = alloc_tdp_mmu_page(vcpu, 0, role);
>   	refcount_set(&root->tdp_mmu_root_count, 1);
>   
>   	spin_lock(&kvm->arch.tdp_mmu_pages_lock);
> @@ -1028,7 +1031,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>   			if (is_removed_spte(iter.old_spte))
>   				break;
>   
> -			sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level - 1);
> +			sp = alloc_child_tdp_mmu_page(vcpu, &iter);
>   			if (!tdp_mmu_install_sp_atomic(vcpu->kvm, &iter, sp, account_nx))
>   				break;
>   		}
>
Lai Jiangshan Nov. 27, 2021, 2:07 a.m. UTC | #2
On Sat, Nov 20, 2021 at 9:02 PM Paolo Bonzini <pbonzini@redhat.com> wrote:

>
> I have a similar patch for the old MMU, but it was also replacing
> shadow_root_level with shadow_root_role.  I'll see if I can adapt it to
> the TDP MMU, since the shadow_root_role is obviously the same for both.
>

Hello, Paolo

I'm sorry to ask something unrelated to this patchset, but related
to my pending work.

I will still continue to do something on shadow_root_level.  But I
would like to wait until your shadow_root_role work is queued.
And is it a part of work splitting the struct kvm_mmu?

Thanks
Lai
Paolo Bonzini Nov. 27, 2021, 10:26 a.m. UTC | #3
On 11/27/21 03:07, Lai Jiangshan wrote:
> On Sat, Nov 20, 2021 at 9:02 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
>>
>> I have a similar patch for the old MMU, but it was also replacing
>> shadow_root_level with shadow_root_role.  I'll see if I can adapt it to
>> the TDP MMU, since the shadow_root_role is obviously the same for both.
>>
> 
> Hello, Paolo
> 
> I'm sorry to ask something unrelated to this patchset, but related
> to my pending work.
> 
> I will still continue to do something on shadow_root_level.  But I
> would like to wait until your shadow_root_role work is queued.
> And is it a part of work splitting the struct kvm_mmu?

Yes, more or less.  I'm basically splitting the "CPU role" (the basic 
and extended role from the processor registers) used for emulation, from 
the "MMU role" (the basic role used for the root shadow page tables). 
Then shadow_root_level/root_level become respectively mmu_role->level 
and cpu_role->base.level.

Paolo
David Matlack Nov. 30, 2021, 11:31 p.m. UTC | #4
On Sat, Nov 20, 2021 at 4:53 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 11/20/21 00:57, David Matlack wrote:
> > Derive the page role from the parent shadow page, since the only thing
> > that changes is the level. This is in preparation for eagerly splitting
> > large pages during VM-ioctls which does not have access to the vCPU
> > MMU context.
> >
> > No functional change intended.
> >
> > Signed-off-by: David Matlack <dmatlack@google.com>
> > ---
> >   arch/x86/kvm/mmu/tdp_mmu.c | 43 ++++++++++++++++++++------------------
> >   1 file changed, 23 insertions(+), 20 deletions(-)
> >
> > diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> > index b70707a7fe87..1a409992a57f 100644
> > --- a/arch/x86/kvm/mmu/tdp_mmu.c
> > +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> > @@ -157,23 +157,8 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
> >               if (kvm_mmu_page_as_id(_root) != _as_id) {              \
> >               } else
> >
> > -static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu,
> > -                                                int level)
> > -{
> > -     union kvm_mmu_page_role role;
> > -
> > -     role = vcpu->arch.mmu->mmu_role.base;
> > -     role.level = level;
> > -     role.direct = true;
> > -     role.gpte_is_8_bytes = true;
> > -     role.access = ACC_ALL;
> > -     role.ad_disabled = !shadow_accessed_mask;
> > -
> > -     return role;
> > -}
> > -
> >   static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
> > -                                            int level)
> > +                                            union kvm_mmu_page_role role)
> >   {
> >       struct kvm_mmu_memory_caches *mmu_caches;
> >       struct kvm_mmu_page *sp;
> > @@ -184,7 +169,7 @@ static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
> >       sp->spt = kvm_mmu_memory_cache_alloc(&mmu_caches->shadow_page_cache);
> >       set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
> >
> > -     sp->role.word = page_role_for_level(vcpu, level).word;
> > +     sp->role = role;
> >       sp->gfn = gfn;
> >       sp->tdp_mmu_page = true;
> >
> > @@ -193,6 +178,19 @@ static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
> >       return sp;
> >   }
> >
> > +static struct kvm_mmu_page *alloc_child_tdp_mmu_page(struct kvm_vcpu *vcpu, struct tdp_iter *iter)
> > +{
> > +     struct kvm_mmu_page *parent_sp;
> > +     union kvm_mmu_page_role role;
> > +
> > +     parent_sp = sptep_to_sp(rcu_dereference(iter->sptep));
> > +
> > +     role = parent_sp->role;
> > +     role.level--;
> > +
> > +     return alloc_tdp_mmu_page(vcpu, iter->gfn, role);
> > +}
> > +
> >   hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
> >   {
> >       union kvm_mmu_page_role role;
> > @@ -201,7 +199,12 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
> >
> >       lockdep_assert_held_write(&kvm->mmu_lock);
> >
> > -     role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level);
> > +     role = vcpu->arch.mmu->mmu_role.base;
> > +     role.level = vcpu->arch.mmu->shadow_root_level;
> > +     role.direct = true;
> > +     role.gpte_is_8_bytes = true;
> > +     role.access = ACC_ALL;
> > +     role.ad_disabled = !shadow_accessed_mask;
>
> I have a similar patch for the old MMU, but it was also replacing
> shadow_root_level with shadow_root_role.  I'll see if I can adapt it to
> the TDP MMU, since the shadow_root_role is obviously the same for both.

While I was writing this patch it got me wondering if we can do an
even more general refactor and replace root_hpa and shadow_root_level
with a pointer to the root kvm_mmu_page struct. But I didn't get a
chance to look into it further.


>
> Paolo
>
> >       /* Check for an existing root before allocating a new one. */
> >       for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
> > @@ -210,7 +213,7 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
> >                       goto out;
> >       }
> >
> > -     root = alloc_tdp_mmu_page(vcpu, 0, vcpu->arch.mmu->shadow_root_level);
> > +     root = alloc_tdp_mmu_page(vcpu, 0, role);
> >       refcount_set(&root->tdp_mmu_root_count, 1);
> >
> >       spin_lock(&kvm->arch.tdp_mmu_pages_lock);
> > @@ -1028,7 +1031,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> >                       if (is_removed_spte(iter.old_spte))
> >                               break;
> >
> > -                     sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level - 1);
> > +                     sp = alloc_child_tdp_mmu_page(vcpu, &iter);
> >                       if (!tdp_mmu_install_sp_atomic(vcpu->kvm, &iter, sp, account_nx))
> >                               break;
> >               }
> >
Sean Christopherson Dec. 1, 2021, 12:45 a.m. UTC | #5
On Tue, Nov 30, 2021, David Matlack wrote:
> > I have a similar patch for the old MMU, but it was also replacing
> > shadow_root_level with shadow_root_role.  I'll see if I can adapt it to
> > the TDP MMU, since the shadow_root_role is obviously the same for both.
> 
> While I was writing this patch it got me wondering if we can do an
> even more general refactor and replace root_hpa and shadow_root_level
> with a pointer to the root kvm_mmu_page struct. But I didn't get a
> chance to look into it further.

For TDP MUU, yes, as root_hpa == __pa(sp->spt) in all cases.  For the legacy/full
MMU, not without additional refactoring since root_hpa doesn't point at a kvm_mmu_page
when KVM shadows a non-paging guest with PAE paging (uses pae_root), or when KVM
shadows nested NPT and the guest is using fewer paging levels that the host (uses
pml5_root or pml4_root).

	if (mmu->shadow_root_level == PT64_ROOT_5LEVEL)
		mmu->root_hpa = __pa(mmu->pml5_root);
	else if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
		mmu->root_hpa = __pa(mmu->pml4_root);
	else
		mmu->root_hpa = __pa(mmu->pae_root);

That's definitely a solvable problem, e.g. it wouldn't be a problem to burn a few
kvm_mmu_page for the special root.  The biggest issue is probably the sheer amount
of code that would need to be updated.  I do think it would be a good change, but
I think we'd want to do it in a release that isn't expected to have many other MMU
changes.

shadow_root_level can also be replaced by mmu_role.base.level.  I've never bothered
to do the replacement because there's zero memory savings and it would undoubtedly
take me some time to retrain my brain :-)
David Matlack Dec. 1, 2021, 9:56 p.m. UTC | #6
On Tue, Nov 30, 2021 at 4:45 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Tue, Nov 30, 2021, David Matlack wrote:
> > > I have a similar patch for the old MMU, but it was also replacing
> > > shadow_root_level with shadow_root_role.  I'll see if I can adapt it to
> > > the TDP MMU, since the shadow_root_role is obviously the same for both.
> >
> > While I was writing this patch it got me wondering if we can do an
> > even more general refactor and replace root_hpa and shadow_root_level
> > with a pointer to the root kvm_mmu_page struct. But I didn't get a
> > chance to look into it further.
>
> For TDP MUU, yes, as root_hpa == __pa(sp->spt) in all cases.  For the legacy/full
> MMU, not without additional refactoring since root_hpa doesn't point at a kvm_mmu_page
> when KVM shadows a non-paging guest with PAE paging (uses pae_root), or when KVM
> shadows nested NPT and the guest is using fewer paging levels that the host (uses
> pml5_root or pml4_root).
>
>         if (mmu->shadow_root_level == PT64_ROOT_5LEVEL)
>                 mmu->root_hpa = __pa(mmu->pml5_root);
>         else if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
>                 mmu->root_hpa = __pa(mmu->pml4_root);
>         else
>                 mmu->root_hpa = __pa(mmu->pae_root);
>
> That's definitely a solvable problem, e.g. it wouldn't be a problem to burn a few
> kvm_mmu_page for the special root.  The biggest issue is probably the sheer amount
> of code that would need to be updated.  I do think it would be a good change, but
> I think we'd want to do it in a release that isn't expected to have many other MMU
> changes.

Thanks for the explanation! I had a feeling this refactor would start
getting hairy when I ventured outside of the TDP MMU.

>
> shadow_root_level can also be replaced by mmu_role.base.level.  I've never bothered
> to do the replacement because there's zero memory savings and it would undoubtedly
> take me some time to retrain my brain :-)
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index b70707a7fe87..1a409992a57f 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -157,23 +157,8 @@  static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
 		if (kvm_mmu_page_as_id(_root) != _as_id) {		\
 		} else
 
-static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu,
-						   int level)
-{
-	union kvm_mmu_page_role role;
-
-	role = vcpu->arch.mmu->mmu_role.base;
-	role.level = level;
-	role.direct = true;
-	role.gpte_is_8_bytes = true;
-	role.access = ACC_ALL;
-	role.ad_disabled = !shadow_accessed_mask;
-
-	return role;
-}
-
 static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
-					       int level)
+					       union kvm_mmu_page_role role)
 {
 	struct kvm_mmu_memory_caches *mmu_caches;
 	struct kvm_mmu_page *sp;
@@ -184,7 +169,7 @@  static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
 	sp->spt = kvm_mmu_memory_cache_alloc(&mmu_caches->shadow_page_cache);
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 
-	sp->role.word = page_role_for_level(vcpu, level).word;
+	sp->role = role;
 	sp->gfn = gfn;
 	sp->tdp_mmu_page = true;
 
@@ -193,6 +178,19 @@  static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
 	return sp;
 }
 
+static struct kvm_mmu_page *alloc_child_tdp_mmu_page(struct kvm_vcpu *vcpu, struct tdp_iter *iter)
+{
+	struct kvm_mmu_page *parent_sp;
+	union kvm_mmu_page_role role;
+
+	parent_sp = sptep_to_sp(rcu_dereference(iter->sptep));
+
+	role = parent_sp->role;
+	role.level--;
+
+	return alloc_tdp_mmu_page(vcpu, iter->gfn, role);
+}
+
 hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
 {
 	union kvm_mmu_page_role role;
@@ -201,7 +199,12 @@  hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
 
 	lockdep_assert_held_write(&kvm->mmu_lock);
 
-	role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level);
+	role = vcpu->arch.mmu->mmu_role.base;
+	role.level = vcpu->arch.mmu->shadow_root_level;
+	role.direct = true;
+	role.gpte_is_8_bytes = true;
+	role.access = ACC_ALL;
+	role.ad_disabled = !shadow_accessed_mask;
 
 	/* Check for an existing root before allocating a new one. */
 	for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
@@ -210,7 +213,7 @@  hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
 			goto out;
 	}
 
-	root = alloc_tdp_mmu_page(vcpu, 0, vcpu->arch.mmu->shadow_root_level);
+	root = alloc_tdp_mmu_page(vcpu, 0, role);
 	refcount_set(&root->tdp_mmu_root_count, 1);
 
 	spin_lock(&kvm->arch.tdp_mmu_pages_lock);
@@ -1028,7 +1031,7 @@  int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 			if (is_removed_spte(iter.old_spte))
 				break;
 
-			sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level - 1);
+			sp = alloc_child_tdp_mmu_page(vcpu, &iter);
 			if (!tdp_mmu_install_sp_atomic(vcpu->kvm, &iter, sp, account_nx))
 				break;
 		}