diff mbox

[09/10,RFC] KVM: x86: MMU: Move parent_pte handling from kvm_mmu_get_page() to link_shadow_page()

Message ID 20151112205656.33f23effece7e6914c3e2646@lab.ntt.co.jp (mailing list archive)
State New, archived
Headers show

Commit Message

Takuya Yoshikawa Nov. 12, 2015, 11:56 a.m. UTC
Every time kvm_mmu_get_page() is called with a non-NULL parent_pte
argument, link_shadow_page() follows that to set the parent entry so
that the new mapping will point to the returned page table.

Moving parent_pte handling there allows to clean up the code because
parent_pte is passed to kvm_mmu_get_page() just for mark_unsync() and
mmu_page_add_parent_pte().

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
---
 arch/x86/kvm/mmu.c         | 21 ++++++++-------------
 arch/x86/kvm/paging_tmpl.h |  6 ++----
 2 files changed, 10 insertions(+), 17 deletions(-)

Comments

Paolo Bonzini Nov. 12, 2015, 2:27 p.m. UTC | #1
On 12/11/2015 12:56, Takuya Yoshikawa wrote:
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 9d21b44..f414ca6 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -598,7 +598,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
>  			goto out_gpte_changed;
>  
>  		if (sp)
> -			link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK);
> +			link_shadow_page(vcpu, it.sptep, sp, PT_GUEST_ACCESSED_MASK);
>  	}
>  

Here I think you can remove completely the

	if (sp)
		kvm_mmu_put_page(sp, it.sptep);

later in FNAME(fetch).  Apart from this nit, it's okay.

On to kvm_mmu_get_page...

        if (!direct) {
                if (rmap_write_protect(vcpu, gfn))
                        kvm_flush_remote_tlbs(vcpu->kvm);
                if (level > PT_PAGE_TABLE_LEVEL && need_sync)
                        kvm_sync_pages(vcpu, gfn);

This seems fishy.

need_sync is set if sp->unsync, but then the parents have not been
unsynced yet.

On the other hand, all calls to kvm_mmu_get_page except for the
roots are followed by link_shadow_page...  Perhaps if parent_pte != NULL
you can call link_shadow_page directly from kvm_mmu_get_page.  The call
would go before the "if (!direct)" and it would subsume all the existing
calls.

We could probably also warn if

	(parent_pte == NULL)
		!= (level == vcpu->arch.mmu.root_level)

in kvm_mmu_get_page.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Nov. 12, 2015, 5:03 p.m. UTC | #2
On 12/11/2015 15:27, Paolo Bonzini wrote:
> Here I think you can remove completely the
> 
> 	if (sp)
> 		kvm_mmu_put_page(sp, it.sptep);
> 
> later in FNAME(fetch).  Apart from this nit, it's okay.

Removing this is of course not possible anymore if the other suggestion
works out.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Takuya Yoshikawa Nov. 13, 2015, 2:15 a.m. UTC | #3
On 2015/11/12 23:27, Paolo Bonzini wrote:

> On 12/11/2015 12:56, Takuya Yoshikawa wrote:
>> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
>> index 9d21b44..f414ca6 100644
>> --- a/arch/x86/kvm/paging_tmpl.h
>> +++ b/arch/x86/kvm/paging_tmpl.h
>> @@ -598,7 +598,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
>>   			goto out_gpte_changed;
>>
>>   		if (sp)
>> -			link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK);
>> +			link_shadow_page(vcpu, it.sptep, sp, PT_GUEST_ACCESSED_MASK);
>>   	}
>>
>
> Here I think you can remove completely the
>
> 	if (sp)
> 		kvm_mmu_put_page(sp, it.sptep);
>
> later in FNAME(fetch).  Apart from this nit, it's okay.

Yes, that's what this patch does below:

> @@ -629,8 +629,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
>  	return emulate;
>
>  out_gpte_changed:
> -	if (sp)
> -		kvm_mmu_put_page(sp, it.sptep);
>  	kvm_release_pfn_clean(pfn);
>  	return 0;
>  }

Since this is the only user of kvm_mmu_put_page(), it also removes
the definition:

> @@ -2268,11 +2268,6 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
>  		mmu_page_zap_pte(kvm, sp, sp->spt + i);
>  }
>
> -static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
> -{
> -	mmu_page_remove_parent_pte(sp, parent_pte);
> -}
> -
>  static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
>  	u64 *sptep;

Actually, I don't understand why this is named kvm_mmu_put_page() for
just removing parent_pte pointer from the sp->parent_ptes pointer chain.


> On to kvm_mmu_get_page...
>
>          if (!direct) {
>                  if (rmap_write_protect(vcpu, gfn))
>                          kvm_flush_remote_tlbs(vcpu->kvm);
>                  if (level > PT_PAGE_TABLE_LEVEL && need_sync)
>                          kvm_sync_pages(vcpu, gfn);
>
> This seems fishy.
>
> need_sync is set if sp->unsync, but then the parents have not been
> unsynced yet.

Reaching here means that kvm_mmu_get_page() could not return sp
from inside the for_each_gfn_sp() loop above, so even without
this patch, mark_unsync() has not been called.

Here, sp holds the new page allocated by kvm_mmu_alloc_page().
One confusing thing is that hlist_add_head() right before this
"if (!direct)" line has already added the new sp to the hash
list, so it will be found by for_each_gfn_indirect_valid_sp()
in kvm_sync_pages().

Because this sp is new and sp->unsync is not set,  kvm_sync_pages()
will just skip it and look for other sp's whose ->unsync were found
to be set in the for_each_gfn_sp() loop.

I'm not 100% sure if the existence of the parent_pte pointer in the
newly created sp->parent_ptes chain alone makes any difference:
> @@ -2127,7 +2122,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>  	sp = kvm_mmu_alloc_page(vcpu, direct);
>
>  	sp->parent_ptes.val = 0;
> -	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
>
>  	sp->gfn = gfn;
>  	sp->role = role;


> On the other hand, all calls to kvm_mmu_get_page except for the
> roots are followed by link_shadow_page...  Perhaps if parent_pte != NULL
> you can call link_shadow_page directly from kvm_mmu_get_page.  The call
> would go before the "if (!direct)" and it would subsume all the existing
> calls.
>
> We could probably also warn if
>
> 	(parent_pte == NULL)
> 		!= (level == vcpu->arch.mmu.root_level)
>
> in kvm_mmu_get_page.

I think we should set the spte after init_shadow_page_table(), and
to make this subsume all the existing calls, we need to change the
"return sp;" in the for_each_gfn_sp() loop to a goto statement so
that the end of this function will become something like this:

     init_shadow_page(sp);
out:
     if (parent_pte) {
         mmu_page_add_parent_pte(vcpu, sp, parent_pte);
         link_shadow_page(parent_pte, sp, accessed);
     }
     trace_kvm_mmu_get_page(sp, created);
     return sp;

So, "bool accessed" needs to be passed to kvm_mmu_get_page().
But any way, we need to understand if mmu_page_add_parent_pte()
really needs to be placed before the "if (!direct)" block.

   Takuya


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Nov. 13, 2015, 10:51 a.m. UTC | #4
On 13/11/2015 03:15, Takuya Yoshikawa wrote:
> Actually, I don't understand why this is named kvm_mmu_put_page() for
> just removing parent_pte pointer from the sp->parent_ptes pointer chain.

Because it undoes kvm_mmu_get_page, I guess. :)

> 
>> On to kvm_mmu_get_page...
>>
>>          if (!direct) {
>>                  if (rmap_write_protect(vcpu, gfn))
>>                          kvm_flush_remote_tlbs(vcpu->kvm);
>>                  if (level > PT_PAGE_TABLE_LEVEL && need_sync)
>>                          kvm_sync_pages(vcpu, gfn);
>>
>> This seems fishy.
>>
>> need_sync is set if sp->unsync, but then the parents have not been
>> unsynced yet.
> 
> Reaching here means that kvm_mmu_get_page() could not return sp
> from inside the for_each_gfn_sp() loop above, so even without
> this patch, mark_unsync() has not been called.

You're right.

> Here, sp holds the new page allocated by kvm_mmu_alloc_page().
> One confusing thing is that hlist_add_head() right before this
> "if (!direct)" line has already added the new sp to the hash
> list, so it will be found by for_each_gfn_indirect_valid_sp()
> in kvm_sync_pages().
> 
> Because this sp is new and sp->unsync is not set,  kvm_sync_pages()
> will just skip it and look for other sp's whose ->unsync were found
> to be set in the for_each_gfn_sp() loop.
> 
> I'm not 100% sure if the existence of the parent_pte pointer in the
> newly created sp->parent_ptes chain alone makes any difference:

No, I don't think so.  Nothing needs the parent_ptes at this point:

- kvm_mmu_mark_parents_unsync, even in the existing code, it's called
before the new SPTE is created.

- as you said, kvm_mmu_prepare_zap_page can be called by kvm_sync_pages
but it will not operate on this page because its ->unsync is zero.

> So, "bool accessed" needs to be passed to kvm_mmu_get_page(). 

The "bool accessed" parameter is not necessary, I think.  It is only
false in the nested EPT case, and there's no reason not to set the
accessed bit *in the shadow page* if the host supports EPT
accessed/dirty bits.  I'll test and send a patch to remove the argument.

> But any way, we need to understand if mmu_page_add_parent_pte()
> really needs to be placed before the "if (!direct)" block.

No, I don't think so anymore.

I think these patches are fine as a starting point for further cleanups,
I'll push them to kvm/queue very soon.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Nov. 18, 2015, 3:32 a.m. UTC | #5
On 11/12/2015 07:56 PM, Takuya Yoshikawa wrote:
> Every time kvm_mmu_get_page() is called with a non-NULL parent_pte
> argument, link_shadow_page() follows that to set the parent entry so
> that the new mapping will point to the returned page table.
>
> Moving parent_pte handling there allows to clean up the code because
> parent_pte is passed to kvm_mmu_get_page() just for mark_unsync() and
> mmu_page_add_parent_pte().
>
> Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
> ---
>   arch/x86/kvm/mmu.c         | 21 ++++++++-------------
>   arch/x86/kvm/paging_tmpl.h |  6 ++----
>   2 files changed, 10 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 9273cd4..33fe720 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2108,14 +2108,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>   		if (sp->unsync_children) {
>   			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
>   			kvm_mmu_mark_parents_unsync(sp);
> -			if (parent_pte)
> -				mark_unsync(parent_pte);
>   		} else if (sp->unsync) {
>   			kvm_mmu_mark_parents_unsync(sp);
> -			if (parent_pte)
> -				mark_unsync(parent_pte);
>   		}
> -		mmu_page_add_parent_pte(vcpu, sp, parent_pte);
>
>   		__clear_sp_write_flooding_count(sp);
>   		trace_kvm_mmu_get_page(sp, false);
> @@ -2127,7 +2122,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>   	sp = kvm_mmu_alloc_page(vcpu, direct);
>
>   	sp->parent_ptes.val = 0;
> -	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
>
>   	sp->gfn = gfn;
>   	sp->role = role;
> @@ -2196,7 +2190,8 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
>   	return __shadow_walk_next(iterator, *iterator->sptep);
>   }
>
> -static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed)
> +static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
> +			     struct kvm_mmu_page *sp, bool accessed)
>   {
>   	u64 spte;
>
> @@ -2210,6 +2205,11 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed)
>   		spte |= shadow_accessed_mask;
>
>   	mmu_spte_set(sptep, spte);
> +
> +	if (sp->unsync_children || sp->unsync)
> +		mark_unsync(sptep);

Why are these needed?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9273cd4..33fe720 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2108,14 +2108,9 @@  static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		if (sp->unsync_children) {
 			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
 			kvm_mmu_mark_parents_unsync(sp);
-			if (parent_pte)
-				mark_unsync(parent_pte);
 		} else if (sp->unsync) {
 			kvm_mmu_mark_parents_unsync(sp);
-			if (parent_pte)
-				mark_unsync(parent_pte);
 		}
-		mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 
 		__clear_sp_write_flooding_count(sp);
 		trace_kvm_mmu_get_page(sp, false);
@@ -2127,7 +2122,6 @@  static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	sp = kvm_mmu_alloc_page(vcpu, direct);
 
 	sp->parent_ptes.val = 0;
-	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 
 	sp->gfn = gfn;
 	sp->role = role;
@@ -2196,7 +2190,8 @@  static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
 	return __shadow_walk_next(iterator, *iterator->sptep);
 }
 
-static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed)
+static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
+			     struct kvm_mmu_page *sp, bool accessed)
 {
 	u64 spte;
 
@@ -2210,6 +2205,11 @@  static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed)
 		spte |= shadow_accessed_mask;
 
 	mmu_spte_set(sptep, spte);
+
+	if (sp->unsync_children || sp->unsync)
+		mark_unsync(sptep);
+
+	mmu_page_add_parent_pte(vcpu, sp, sptep);
 }
 
 static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
@@ -2268,11 +2268,6 @@  static void kvm_mmu_page_unlink_children(struct kvm *kvm,
 		mmu_page_zap_pte(kvm, sp, sp->spt + i);
 }
 
-static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
-{
-	mmu_page_remove_parent_pte(sp, parent_pte);
-}
-
 static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	u64 *sptep;
@@ -2738,7 +2733,7 @@  static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
 					      iterator.level - 1,
 					      1, ACC_ALL, iterator.sptep);
 
-			link_shadow_page(iterator.sptep, sp, true);
+			link_shadow_page(vcpu, iterator.sptep, sp, true);
 		}
 	}
 	return emulate;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9d21b44..f414ca6 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -598,7 +598,7 @@  static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			goto out_gpte_changed;
 
 		if (sp)
-			link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK);
+			link_shadow_page(vcpu, it.sptep, sp, PT_GUEST_ACCESSED_MASK);
 	}
 
 	for (;
@@ -618,7 +618,7 @@  static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
 		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
 				      true, direct_access, it.sptep);
-		link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK);
+		link_shadow_page(vcpu, it.sptep, sp, PT_GUEST_ACCESSED_MASK);
 	}
 
 	clear_sp_write_flooding_count(it.sptep);
@@ -629,8 +629,6 @@  static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	return emulate;
 
 out_gpte_changed:
-	if (sp)
-		kvm_mmu_put_page(sp, it.sptep);
 	kvm_release_pfn_clean(pfn);
 	return 0;
 }