diff mbox series

KVM: x86: introduce drop_spte_fast() when caller knows rmap_head

Message ID 20180926075853.28715-1-richard.weiyang@gmail.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: introduce drop_spte_fast() when caller knows rmap_head | expand

Commit Message

Wei Yang Sept. 26, 2018, 7:58 a.m. UTC
rmap_remove() would remove the sptep after locating the correct
rmap_head. While in several cases, the caller has already known the
correct rmap_head.

This patch introduce drop_spte_fast() which remove sptep directly
from rmap_head by pte_list_remove().

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
---
 arch/x86/kvm/mmu.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

Comments

Paolo Bonzini Oct. 1, 2018, 4:41 p.m. UTC | #1
On 26/09/2018 09:58, Wei Yang wrote:
> 
> +/* In case caller knows the rmap_head, just remove it by pte_list_remove() */
> +static void drop_spte_fast(struct kvm_rmap_head *rmap_head, u64 *sptep)
> +{
> +	if (mmu_spte_clear_track_bits(sptep))
> +		pte_list_remove(sptep, rmap_head);
> +}
> +

Here mmu_spte_clear_track_bits cannot return false, because
mmu_spte_clear_track_bits only returns false if
!is_shadow_present_pte(old_spte) and for_each_rmap_spte checks for that.

I think you should call this function pte_list_remove, and rename the
existing pte_list_remove to __pte_list_remove.

> @@ -1669,7 +1676,7 @@ static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
>  	while ((sptep = rmap_get_first(rmap_head, &iter))) {
>  		rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
>  
> -		drop_spte(kvm, sptep);
> +		drop_spte_fast(rmap_head, sptep);
>  		flush = true;
>  	}
>  
> @@ -1705,7 +1712,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
>  		need_flush = 1;
>  
>  		if (pte_write(*ptep)) {
> -			drop_spte(kvm, sptep);
> +			drop_spte_fast(rmap_head, sptep);
>  			goto restart;
>  		} else {
>  			new_spte = *sptep & ~PT64_BASE_ADDR_MASK;

... so this should probably move

	mmu_spte_clear_track_bits(sptep)

from the "else" branch to before the "if", and call __pte_list_remove
directly.

Thanks,

Paolo

> @@ -5598,7 +5605,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
>  		if (sp->role.direct &&
>  			!kvm_is_reserved_pfn(pfn) &&
>  			PageTransCompoundMap(pfn_to_page(pfn))) {
> -			drop_spte(kvm, sptep);
> +			drop_spte_fast(rmap_head, sptep);
>  			need_tlb_flush = 1;
>  			goto restart;
>  		}
Wei Yang Oct. 2, 2018, 12:25 p.m. UTC | #2
On Mon, Oct 01, 2018 at 06:41:21PM +0200, Paolo Bonzini wrote:
>On 26/09/2018 09:58, Wei Yang wrote:
>> 
>> +/* In case caller knows the rmap_head, just remove it by pte_list_remove() */
>> +static void drop_spte_fast(struct kvm_rmap_head *rmap_head, u64 *sptep)
>> +{
>> +	if (mmu_spte_clear_track_bits(sptep))
>> +		pte_list_remove(sptep, rmap_head);
>> +}
>> +

Thanks for your comment.

>
>Here mmu_spte_clear_track_bits cannot return false, because
>mmu_spte_clear_track_bits only returns false if
>!is_shadow_present_pte(old_spte) and for_each_rmap_spte checks for that.
>

I think you are right, missed this point.

BTW, I got one point not clear in mmu_spte_clear_track_bits().

In this function, we tries to get old_spte in two cases:

  * at the very beginning
  * or use __update_clear_spte_slow() in case has_volatile_bits()

This means there is a chance someone would change this spte at the same
time? (I got some difficulty to understand this part. I would appreciate
it if you would give me some insight.)

If this is the case, then the old_spte returned from
mmu_spte_clear_track_bits() would be NULL even for_each_rmap_spte() has
done the check?

>I think you should call this function pte_list_remove, and rename the
>existing pte_list_remove to __pte_list_remove.
>

Sure, I would do this in v2.

>> @@ -1669,7 +1676,7 @@ static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
>>  	while ((sptep = rmap_get_first(rmap_head, &iter))) {
>>  		rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
>>  
>> -		drop_spte(kvm, sptep);
>> +		drop_spte_fast(rmap_head, sptep);
>>  		flush = true;
>>  	}
>>  
>> @@ -1705,7 +1712,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
>>  		need_flush = 1;
>>  
>>  		if (pte_write(*ptep)) {
>> -			drop_spte(kvm, sptep);
>> +			drop_spte_fast(rmap_head, sptep);
>>  			goto restart;
>>  		} else {
>>  			new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
>
>... so this should probably move
>
>	mmu_spte_clear_track_bits(sptep)
>
>from the "else" branch to before the "if", and call __pte_list_remove
>directly.
>

Looks reasonable to me, will add this change in v2.

>Thanks,
>
>Paolo
>
>> @@ -5598,7 +5605,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
>>  		if (sp->role.direct &&
>>  			!kvm_is_reserved_pfn(pfn) &&
>>  			PageTransCompoundMap(pfn_to_page(pfn))) {
>> -			drop_spte(kvm, sptep);
>> +			drop_spte_fast(rmap_head, sptep);
>>  			need_tlb_flush = 1;
>>  			goto restart;
>>  		}
Wei Yang Oct. 3, 2018, 1:40 a.m. UTC | #3
On Tue, Oct 02, 2018 at 12:25:18PM +0000, Wei Yang wrote:
>On Mon, Oct 01, 2018 at 06:41:21PM +0200, Paolo Bonzini wrote:
>>On 26/09/2018 09:58, Wei Yang wrote:
>>> 
>>> +/* In case caller knows the rmap_head, just remove it by pte_list_remove() */
>>> +static void drop_spte_fast(struct kvm_rmap_head *rmap_head, u64 *sptep)
>>> +{
>>> +	if (mmu_spte_clear_track_bits(sptep))
>>> +		pte_list_remove(sptep, rmap_head);
>>> +}
>>> +
>
>Thanks for your comment.
>
>>
>>Here mmu_spte_clear_track_bits cannot return false, because
>>mmu_spte_clear_track_bits only returns false if
>>!is_shadow_present_pte(old_spte) and for_each_rmap_spte checks for that.
>>
>
>I think you are right, missed this point.
>
>BTW, I got one point not clear in mmu_spte_clear_track_bits().
>
>In this function, we tries to get old_spte in two cases:
>
>  * at the very beginning
>  * or use __update_clear_spte_slow() in case has_volatile_bits()
>
>This means there is a chance someone would change this spte at the same
>time? (I got some difficulty to understand this part. I would appreciate
>it if you would give me some insight.)
>
>If this is the case, then the old_spte returned from
>mmu_spte_clear_track_bits() would be NULL even for_each_rmap_spte() has
>done the check?
>

Let me try to explain my understanding. In case there is some error,
your comment is welcome.

In function spte_has_volatile_bits(), it checks

 * guest/host writable
 * A/D bit is clear, when A/D enabled

If either one is true, a page fault will be triggered when spte changes.
This means someone else would modify this spte simultaneously.

My conclusion:

    Based on this, it looks possible to get a non-present spte from
    mmu_spte_clear_track_bits() even it was present.

While I still confuse in one logic for is_access_track_spte().

It checks (spte & shadow_acc_track_mask), while I searched the project
and only find two places clear it, mark_spte_for_access_track() and
restore_acc_track_spte(). Not sure how it is used.
Paolo Bonzini Oct. 3, 2018, 7:48 a.m. UTC | #4
On 03/10/2018 03:40, Wei Yang wrote:
>  * guest/host writable
>  * A/D bit is clear, when A/D enabled
> 
> If either one is true, a page fault will be triggered when spte changes.
> This means someone else would modify this spte simultaneously.

Hi,

page faults happen under mmu_lock, with the exception of "fast page
fault" that change the accessed and dirty bits.  Therefore, neither a
concurent present<->nonpresent switch nor a change of
spte_has_volatile_bits(old_spte) can happen during
mmu_spte_clear_track_bits.

Paolo
Wei Yang Oct. 4, 2018, 12:11 a.m. UTC | #5
On Wed, Oct 03, 2018 at 09:48:09AM +0200, Paolo Bonzini wrote:
>On 03/10/2018 03:40, Wei Yang wrote:
>>  * guest/host writable
>>  * A/D bit is clear, when A/D enabled
>> 
>> If either one is true, a page fault will be triggered when spte changes.
>> This means someone else would modify this spte simultaneously.
>
>Hi,
>
>page faults happen under mmu_lock, with the exception of "fast page
>fault" that change the accessed and dirty bits.  Therefore, neither a
>concurent present<->nonpresent switch nor a change of
>spte_has_volatile_bits(old_spte) can happen during
>mmu_spte_clear_track_bits.
>

Thanks for your explanation. :-)

Let me prepare v2.

>Paolo
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 371d200ffd4a..9fa77aa24fc7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1412,6 +1412,13 @@  static u64 *rmap_get_next(struct rmap_iterator *iter)
 	for (_spte_ = rmap_get_first(_rmap_head_, _iter_);		\
 	     _spte_; _spte_ = rmap_get_next(_iter_))
 
+/* In case caller knows the rmap_head, just remove it by pte_list_remove() */
+static void drop_spte_fast(struct kvm_rmap_head *rmap_head, u64 *sptep)
+{
+	if (mmu_spte_clear_track_bits(sptep))
+		pte_list_remove(sptep, rmap_head);
+}
+
 static void drop_spte(struct kvm *kvm, u64 *sptep)
 {
 	if (mmu_spte_clear_track_bits(sptep))
@@ -1669,7 +1676,7 @@  static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 	while ((sptep = rmap_get_first(rmap_head, &iter))) {
 		rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
 
-		drop_spte(kvm, sptep);
+		drop_spte_fast(rmap_head, sptep);
 		flush = true;
 	}
 
@@ -1705,7 +1712,7 @@  static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 		need_flush = 1;
 
 		if (pte_write(*ptep)) {
-			drop_spte(kvm, sptep);
+			drop_spte_fast(rmap_head, sptep);
 			goto restart;
 		} else {
 			new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
@@ -5598,7 +5605,7 @@  static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
 		if (sp->role.direct &&
 			!kvm_is_reserved_pfn(pfn) &&
 			PageTransCompoundMap(pfn_to_page(pfn))) {
-			drop_spte(kvm, sptep);
+			drop_spte_fast(rmap_head, sptep);
 			need_tlb_flush = 1;
 			goto restart;
 		}